1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2017 Intel Corporation
5 #include <rte_atomic.h>
6 #include <rte_branch_prediction.h>
7 #include <rte_byteorder.h>
8 #include <rte_common.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_malloc.h>
13 #include <rte_bus_vdev.h>
14 #include <rte_kvargs.h>
16 #include <rte_debug.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev.h>
20 #include <rte_errno.h>
21 #include <rte_cycles.h>
23 #include <sys/types.h>
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
27 #include <sys/utsname.h>
35 #include <arpa/inet.h>
37 #include <linux/if_tun.h>
38 #include <linux/if_ether.h>
43 #include <rte_eth_tap.h>
45 #include <tap_netlink.h>
46 #include <tap_tcmsgs.h>
48 /* Linux based path to the TUN device */
49 #define TUN_TAP_DEV_PATH "/dev/net/tun"
50 #define DEFAULT_TAP_NAME "dtap"
51 #define DEFAULT_TUN_NAME "dtun"
53 #define ETH_TAP_IFACE_ARG "iface"
54 #define ETH_TAP_REMOTE_ARG "remote"
55 #define ETH_TAP_MAC_ARG "mac"
56 #define ETH_TAP_MAC_FIXED "fixed"
58 #define ETH_TAP_USR_MAC_FMT "xx:xx:xx:xx:xx:xx"
59 #define ETH_TAP_CMP_MAC_FMT "0123456789ABCDEFabcdef"
60 #define ETH_TAP_MAC_ARG_FMT ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
62 #define TAP_GSO_MBUFS_PER_CORE 128
63 #define TAP_GSO_MBUF_SEG_SIZE 128
64 #define TAP_GSO_MBUF_CACHE_SIZE 4
65 #define TAP_GSO_MBUFS_NUM \
66 (TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
68 /* IPC key for queue fds sync */
69 #define TAP_MP_KEY "tap_mp_sync_queues"
71 #define TAP_IOV_DEFAULT_MAX 1024
73 static int tap_devices_count
;
75 static const char *valid_arguments
[] = {
82 static volatile uint32_t tap_trigger
; /* Rx trigger */
84 static struct rte_eth_link pmd_link
= {
85 .link_speed
= ETH_SPEED_NUM_10G
,
86 .link_duplex
= ETH_LINK_FULL_DUPLEX
,
87 .link_status
= ETH_LINK_DOWN
,
88 .link_autoneg
= ETH_LINK_FIXED
,
92 tap_trigger_cb(int sig __rte_unused
)
94 /* Valid trigger values are nonzero */
95 tap_trigger
= (tap_trigger
+ 1) | 0x80000000;
98 /* Specifies on what netdevices the ioctl should be applied */
105 /* Message header to synchronize queues via IPC */
107 char port_name
[RTE_DEV_NAME_MAX_LEN
];
111 * The file descriptors are in the dedicated part
112 * of the Unix message to be translated by the kernel.
116 static int tap_intr_handle_set(struct rte_eth_dev
*dev
, int set
);
119 * Tun/Tap allocation routine
122 * Pointer to private structure.
124 * @param[in] is_keepalive
128 * -1 on failure, fd on success
131 tun_alloc(struct pmd_internals
*pmd
, int is_keepalive
)
134 #ifdef IFF_MULTI_QUEUE
135 unsigned int features
;
139 memset(&ifr
, 0, sizeof(struct ifreq
));
142 * Do not set IFF_NO_PI as packet information header will be needed
143 * to check if a received packet has been truncated.
145 ifr
.ifr_flags
= (pmd
->type
== ETH_TUNTAP_TYPE_TAP
) ?
146 IFF_TAP
: IFF_TUN
| IFF_POINTOPOINT
;
147 strlcpy(ifr
.ifr_name
, pmd
->name
, IFNAMSIZ
);
149 fd
= open(TUN_TAP_DEV_PATH
, O_RDWR
);
151 TAP_LOG(ERR
, "Unable to open %s interface", TUN_TAP_DEV_PATH
);
155 #ifdef IFF_MULTI_QUEUE
156 /* Grab the TUN features to verify we can work multi-queue */
157 if (ioctl(fd
, TUNGETFEATURES
, &features
) < 0) {
158 TAP_LOG(ERR
, "unable to get TUN/TAP features");
161 TAP_LOG(DEBUG
, "%s Features %08x", TUN_TAP_DEV_PATH
, features
);
163 if (features
& IFF_MULTI_QUEUE
) {
164 TAP_LOG(DEBUG
, " Multi-queue support for %d queues",
165 RTE_PMD_TAP_MAX_QUEUES
);
166 ifr
.ifr_flags
|= IFF_MULTI_QUEUE
;
170 ifr
.ifr_flags
|= IFF_ONE_QUEUE
;
171 TAP_LOG(DEBUG
, " Single queue only support");
174 /* Set the TUN/TAP configuration and set the name if needed */
175 if (ioctl(fd
, TUNSETIFF
, (void *)&ifr
) < 0) {
176 TAP_LOG(WARNING
, "Unable to set TUNSETIFF for %s: %s",
177 ifr
.ifr_name
, strerror(errno
));
182 * Name passed to kernel might be wildcard like dtun%d
183 * and need to find the resulting device.
185 TAP_LOG(DEBUG
, "Device name is '%s'", ifr
.ifr_name
);
186 strlcpy(pmd
->name
, ifr
.ifr_name
, RTE_ETH_NAME_MAX_LEN
);
190 * Detach the TUN/TAP keep-alive queue
191 * to avoid traffic through it
193 ifr
.ifr_flags
= IFF_DETACH_QUEUE
;
194 if (ioctl(fd
, TUNSETQUEUE
, (void *)&ifr
) < 0) {
196 "Unable to detach keep-alive queue for %s: %s",
197 ifr
.ifr_name
, strerror(errno
));
202 /* Always set the file descriptor to non-blocking */
203 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0) {
205 "Unable to set %s to nonblocking: %s",
206 ifr
.ifr_name
, strerror(errno
));
210 /* Set up trigger to optimize empty Rx bursts */
214 int flags
= fcntl(fd
, F_GETFL
);
216 if (flags
== -1 || sigaction(SIGIO
, NULL
, &sa
) == -1)
218 if (sa
.sa_handler
!= tap_trigger_cb
) {
220 * Make sure SIGIO is not already taken. This is done
221 * as late as possible to leave the application a
222 * chance to set up its own signal handler first.
224 if (sa
.sa_handler
!= SIG_IGN
&&
225 sa
.sa_handler
!= SIG_DFL
) {
229 sa
= (struct sigaction
){
230 .sa_flags
= SA_RESTART
,
231 .sa_handler
= tap_trigger_cb
,
233 if (sigaction(SIGIO
, &sa
, NULL
) == -1)
236 /* Enable SIGIO on file descriptor */
237 fcntl(fd
, F_SETFL
, flags
| O_ASYNC
);
238 fcntl(fd
, F_SETOWN
, getpid());
242 /* Disable trigger globally in case of error */
244 TAP_LOG(WARNING
, "Rx trigger disabled: %s",
257 tap_verify_csum(struct rte_mbuf
*mbuf
)
259 uint32_t l2
= mbuf
->packet_type
& RTE_PTYPE_L2_MASK
;
260 uint32_t l3
= mbuf
->packet_type
& RTE_PTYPE_L3_MASK
;
261 uint32_t l4
= mbuf
->packet_type
& RTE_PTYPE_L4_MASK
;
262 unsigned int l2_len
= sizeof(struct rte_ether_hdr
);
268 if (l2
== RTE_PTYPE_L2_ETHER_VLAN
)
270 else if (l2
== RTE_PTYPE_L2_ETHER_QINQ
)
272 /* Don't verify checksum for packets with discontinuous L2 header */
273 if (unlikely(l2_len
+ sizeof(struct rte_ipv4_hdr
) >
274 rte_pktmbuf_data_len(mbuf
)))
276 l3_hdr
= rte_pktmbuf_mtod_offset(mbuf
, void *, l2_len
);
277 if (l3
== RTE_PTYPE_L3_IPV4
|| l3
== RTE_PTYPE_L3_IPV4_EXT
) {
278 struct rte_ipv4_hdr
*iph
= l3_hdr
;
280 /* ihl contains the number of 4-byte words in the header */
281 l3_len
= 4 * (iph
->version_ihl
& 0xf);
282 if (unlikely(l2_len
+ l3_len
> rte_pktmbuf_data_len(mbuf
)))
284 /* check that the total length reported by header is not
285 * greater than the total received size
287 if (l2_len
+ rte_be_to_cpu_16(iph
->total_length
) >
288 rte_pktmbuf_data_len(mbuf
))
291 cksum
= ~rte_raw_cksum(iph
, l3_len
);
292 mbuf
->ol_flags
|= cksum
?
293 PKT_RX_IP_CKSUM_BAD
:
294 PKT_RX_IP_CKSUM_GOOD
;
295 } else if (l3
== RTE_PTYPE_L3_IPV6
) {
296 struct rte_ipv6_hdr
*iph
= l3_hdr
;
298 l3_len
= sizeof(struct rte_ipv6_hdr
);
299 /* check that the total length reported by header is not
300 * greater than the total received size
302 if (l2_len
+ l3_len
+ rte_be_to_cpu_16(iph
->payload_len
) >
303 rte_pktmbuf_data_len(mbuf
))
306 /* IPv6 extensions are not supported */
309 if (l4
== RTE_PTYPE_L4_UDP
|| l4
== RTE_PTYPE_L4_TCP
) {
310 l4_hdr
= rte_pktmbuf_mtod_offset(mbuf
, void *, l2_len
+ l3_len
);
311 /* Don't verify checksum for multi-segment packets. */
312 if (mbuf
->nb_segs
> 1)
314 if (l3
== RTE_PTYPE_L3_IPV4
)
315 cksum
= ~rte_ipv4_udptcp_cksum(l3_hdr
, l4_hdr
);
316 else if (l3
== RTE_PTYPE_L3_IPV6
)
317 cksum
= ~rte_ipv6_udptcp_cksum(l3_hdr
, l4_hdr
);
318 mbuf
->ol_flags
|= cksum
?
319 PKT_RX_L4_CKSUM_BAD
:
320 PKT_RX_L4_CKSUM_GOOD
;
325 tap_rx_offload_get_port_capa(void)
328 * No specific port Rx offload capabilities.
334 tap_rx_offload_get_queue_capa(void)
336 return DEV_RX_OFFLOAD_SCATTER
|
337 DEV_RX_OFFLOAD_IPV4_CKSUM
|
338 DEV_RX_OFFLOAD_UDP_CKSUM
|
339 DEV_RX_OFFLOAD_TCP_CKSUM
;
343 tap_rxq_pool_free(struct rte_mbuf
*pool
)
345 struct rte_mbuf
*mbuf
= pool
;
346 uint16_t nb_segs
= 1;
355 pool
->nb_segs
= nb_segs
;
356 rte_pktmbuf_free(pool
);
359 /* Callback to handle the rx burst of packets to the correct interface and
360 * file descriptor(s) in a multi-queue setup.
363 pmd_rx_burst(void *queue
, struct rte_mbuf
**bufs
, uint16_t nb_pkts
)
365 struct rx_queue
*rxq
= queue
;
366 struct pmd_process_private
*process_private
;
368 unsigned long num_rx_bytes
= 0;
369 uint32_t trigger
= tap_trigger
;
371 if (trigger
== rxq
->trigger_seen
)
374 process_private
= rte_eth_devices
[rxq
->in_port
].process_private
;
375 for (num_rx
= 0; num_rx
< nb_pkts
; ) {
376 struct rte_mbuf
*mbuf
= rxq
->pool
;
377 struct rte_mbuf
*seg
= NULL
;
378 struct rte_mbuf
*new_tail
= NULL
;
379 uint16_t data_off
= rte_pktmbuf_headroom(mbuf
);
382 len
= readv(process_private
->rxq_fds
[rxq
->queue_id
],
384 1 + (rxq
->rxmode
->offloads
& DEV_RX_OFFLOAD_SCATTER
?
385 rxq
->nb_rx_desc
: 1));
386 if (len
< (int)sizeof(struct tun_pi
))
389 /* Packet couldn't fit in the provided mbuf */
390 if (unlikely(rxq
->pi
.flags
& TUN_PKT_STRIP
)) {
391 rxq
->stats
.ierrors
++;
395 len
-= sizeof(struct tun_pi
);
398 mbuf
->port
= rxq
->in_port
;
400 struct rte_mbuf
*buf
= rte_pktmbuf_alloc(rxq
->mp
);
402 if (unlikely(!buf
)) {
403 rxq
->stats
.rx_nombuf
++;
404 /* No new buf has been allocated: do nothing */
405 if (!new_tail
|| !seg
)
409 tap_rxq_pool_free(mbuf
);
413 seg
= seg
? seg
->next
: mbuf
;
414 if (rxq
->pool
== mbuf
)
417 new_tail
->next
= buf
;
419 new_tail
->next
= seg
->next
;
421 /* iovecs[0] is reserved for packet info (pi) */
422 (*rxq
->iovecs
)[mbuf
->nb_segs
].iov_len
=
423 buf
->buf_len
- data_off
;
424 (*rxq
->iovecs
)[mbuf
->nb_segs
].iov_base
=
425 (char *)buf
->buf_addr
+ data_off
;
427 seg
->data_len
= RTE_MIN(seg
->buf_len
- data_off
, len
);
428 seg
->data_off
= data_off
;
430 len
-= seg
->data_len
;
434 /* First segment has headroom, not the others */
438 mbuf
->packet_type
= rte_net_get_ptype(mbuf
, NULL
,
440 if (rxq
->rxmode
->offloads
& DEV_RX_OFFLOAD_CHECKSUM
)
441 tap_verify_csum(mbuf
);
443 /* account for the receive frame */
444 bufs
[num_rx
++] = mbuf
;
445 num_rx_bytes
+= mbuf
->pkt_len
;
448 rxq
->stats
.ipackets
+= num_rx
;
449 rxq
->stats
.ibytes
+= num_rx_bytes
;
451 if (trigger
&& num_rx
< nb_pkts
)
452 rxq
->trigger_seen
= trigger
;
458 tap_tx_offload_get_port_capa(void)
461 * No specific port Tx offload capabilities.
467 tap_tx_offload_get_queue_capa(void)
469 return DEV_TX_OFFLOAD_MULTI_SEGS
|
470 DEV_TX_OFFLOAD_IPV4_CKSUM
|
471 DEV_TX_OFFLOAD_UDP_CKSUM
|
472 DEV_TX_OFFLOAD_TCP_CKSUM
|
473 DEV_TX_OFFLOAD_TCP_TSO
;
476 /* Finalize l4 checksum calculation */
478 tap_tx_l4_cksum(uint16_t *l4_cksum
, uint16_t l4_phdr_cksum
,
479 uint32_t l4_raw_cksum
)
484 cksum
= __rte_raw_cksum_reduce(l4_raw_cksum
);
485 cksum
+= l4_phdr_cksum
;
487 cksum
= ((cksum
& 0xffff0000) >> 16) + (cksum
& 0xffff);
488 cksum
= (~cksum
) & 0xffff;
495 /* Accumaulate L4 raw checksums */
497 tap_tx_l4_add_rcksum(char *l4_data
, unsigned int l4_len
, uint16_t *l4_cksum
,
498 uint32_t *l4_raw_cksum
)
500 if (l4_cksum
== NULL
)
503 *l4_raw_cksum
= __rte_raw_cksum(l4_data
, l4_len
, *l4_raw_cksum
);
506 /* L3 and L4 pseudo headers checksum offloads */
508 tap_tx_l3_cksum(char *packet
, uint64_t ol_flags
, unsigned int l2_len
,
509 unsigned int l3_len
, unsigned int l4_len
, uint16_t **l4_cksum
,
510 uint16_t *l4_phdr_cksum
, uint32_t *l4_raw_cksum
)
512 void *l3_hdr
= packet
+ l2_len
;
514 if (ol_flags
& (PKT_TX_IP_CKSUM
| PKT_TX_IPV4
)) {
515 struct rte_ipv4_hdr
*iph
= l3_hdr
;
518 iph
->hdr_checksum
= 0;
519 cksum
= rte_raw_cksum(iph
, l3_len
);
520 iph
->hdr_checksum
= (cksum
== 0xffff) ? cksum
: ~cksum
;
522 if (ol_flags
& PKT_TX_L4_MASK
) {
525 l4_hdr
= packet
+ l2_len
+ l3_len
;
526 if ((ol_flags
& PKT_TX_L4_MASK
) == PKT_TX_UDP_CKSUM
)
527 *l4_cksum
= &((struct rte_udp_hdr
*)l4_hdr
)->dgram_cksum
;
528 else if ((ol_flags
& PKT_TX_L4_MASK
) == PKT_TX_TCP_CKSUM
)
529 *l4_cksum
= &((struct rte_tcp_hdr
*)l4_hdr
)->cksum
;
533 if (ol_flags
& PKT_TX_IPV4
)
534 *l4_phdr_cksum
= rte_ipv4_phdr_cksum(l3_hdr
, 0);
536 *l4_phdr_cksum
= rte_ipv6_phdr_cksum(l3_hdr
, 0);
537 *l4_raw_cksum
= __rte_raw_cksum(l4_hdr
, l4_len
, 0);
542 tap_write_mbufs(struct tx_queue
*txq
, uint16_t num_mbufs
,
543 struct rte_mbuf
**pmbufs
,
544 uint16_t *num_packets
, unsigned long *num_tx_bytes
)
548 struct pmd_process_private
*process_private
;
550 process_private
= rte_eth_devices
[txq
->out_port
].process_private
;
552 for (i
= 0; i
< num_mbufs
; i
++) {
553 struct rte_mbuf
*mbuf
= pmbufs
[i
];
554 struct iovec iovecs
[mbuf
->nb_segs
+ 2];
555 struct tun_pi pi
= { .flags
= 0, .proto
= 0x00 };
556 struct rte_mbuf
*seg
= mbuf
;
557 char m_copy
[mbuf
->data_len
];
561 int k
; /* current index in iovecs for copying segments */
562 uint16_t seg_len
; /* length of first segment */
564 uint16_t *l4_cksum
; /* l4 checksum (pseudo header + payload) */
565 uint32_t l4_raw_cksum
= 0; /* TCP/UDP payload raw checksum */
566 uint16_t l4_phdr_cksum
= 0; /* TCP/UDP pseudo header checksum */
567 uint16_t is_cksum
= 0; /* in case cksum should be offloaded */
570 if (txq
->type
== ETH_TUNTAP_TYPE_TUN
) {
572 * TUN and TAP are created with IFF_NO_PI disabled.
573 * For TUN PMD this mandatory as fields are used by
574 * Kernel tun.c to determine whether its IP or non IP
577 * The logic fetches the first byte of data from mbuf
578 * then compares whether its v4 or v6. If first byte
579 * is 4 or 6, then protocol field is updated.
581 char *buff_data
= rte_pktmbuf_mtod(seg
, void *);
582 proto
= (*buff_data
& 0xf0);
583 pi
.proto
= (proto
== 0x40) ?
584 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4
) :
586 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6
) :
591 iovecs
[k
].iov_base
= &pi
;
592 iovecs
[k
].iov_len
= sizeof(pi
);
595 nb_segs
= mbuf
->nb_segs
;
597 ((mbuf
->ol_flags
& (PKT_TX_IP_CKSUM
| PKT_TX_IPV4
) ||
598 (mbuf
->ol_flags
& PKT_TX_L4_MASK
) == PKT_TX_UDP_CKSUM
||
599 (mbuf
->ol_flags
& PKT_TX_L4_MASK
) == PKT_TX_TCP_CKSUM
))) {
602 /* Support only packets with at least layer 4
603 * header included in the first segment
605 seg_len
= rte_pktmbuf_data_len(mbuf
);
606 l234_hlen
= mbuf
->l2_len
+ mbuf
->l3_len
+ mbuf
->l4_len
;
607 if (seg_len
< l234_hlen
)
610 /* To change checksums, work on a * copy of l2, l3
611 * headers + l4 pseudo header
613 rte_memcpy(m_copy
, rte_pktmbuf_mtod(mbuf
, void *),
615 tap_tx_l3_cksum(m_copy
, mbuf
->ol_flags
,
616 mbuf
->l2_len
, mbuf
->l3_len
, mbuf
->l4_len
,
617 &l4_cksum
, &l4_phdr_cksum
,
619 iovecs
[k
].iov_base
= m_copy
;
620 iovecs
[k
].iov_len
= l234_hlen
;
623 /* Update next iovecs[] beyond l2, l3, l4 headers */
624 if (seg_len
> l234_hlen
) {
625 iovecs
[k
].iov_len
= seg_len
- l234_hlen
;
627 rte_pktmbuf_mtod(seg
, char *) +
629 tap_tx_l4_add_rcksum(iovecs
[k
].iov_base
,
630 iovecs
[k
].iov_len
, l4_cksum
,
638 for (j
= k
; j
<= nb_segs
; j
++) {
639 iovecs
[j
].iov_len
= rte_pktmbuf_data_len(seg
);
640 iovecs
[j
].iov_base
= rte_pktmbuf_mtod(seg
, void *);
642 tap_tx_l4_add_rcksum(iovecs
[j
].iov_base
,
643 iovecs
[j
].iov_len
, l4_cksum
,
649 tap_tx_l4_cksum(l4_cksum
, l4_phdr_cksum
, l4_raw_cksum
);
651 /* copy the tx frame data */
652 n
= writev(process_private
->txq_fds
[txq
->queue_id
], iovecs
, j
);
657 (*num_tx_bytes
) += rte_pktmbuf_pkt_len(mbuf
);
662 /* Callback to handle sending packets from the tap interface
665 pmd_tx_burst(void *queue
, struct rte_mbuf
**bufs
, uint16_t nb_pkts
)
667 struct tx_queue
*txq
= queue
;
669 uint16_t num_packets
= 0;
670 unsigned long num_tx_bytes
= 0;
674 if (unlikely(nb_pkts
== 0))
677 struct rte_mbuf
*gso_mbufs
[MAX_GSO_MBUFS
];
678 max_size
= *txq
->mtu
+ (RTE_ETHER_HDR_LEN
+ RTE_ETHER_CRC_LEN
+ 4);
679 for (i
= 0; i
< nb_pkts
; i
++) {
680 struct rte_mbuf
*mbuf_in
= bufs
[num_tx
];
681 struct rte_mbuf
**mbuf
;
682 uint16_t num_mbufs
= 0;
683 uint16_t tso_segsz
= 0;
689 tso
= mbuf_in
->ol_flags
& PKT_TX_TCP_SEG
;
691 struct rte_gso_ctx
*gso_ctx
= &txq
->gso_ctx
;
693 /* TCP segmentation implies TCP checksum offload */
694 mbuf_in
->ol_flags
|= PKT_TX_TCP_CKSUM
;
696 /* gso size is calculated without RTE_ETHER_CRC_LEN */
697 hdrs_len
= mbuf_in
->l2_len
+ mbuf_in
->l3_len
+
699 tso_segsz
= mbuf_in
->tso_segsz
+ hdrs_len
;
700 if (unlikely(tso_segsz
== hdrs_len
) ||
701 tso_segsz
> *txq
->mtu
) {
705 gso_ctx
->gso_size
= tso_segsz
;
706 /* 'mbuf_in' packet to segment */
707 num_tso_mbufs
= rte_gso_segment(mbuf_in
,
708 gso_ctx
, /* gso control block */
709 (struct rte_mbuf
**)&gso_mbufs
, /* out mbufs */
710 RTE_DIM(gso_mbufs
)); /* max tso mbufs */
712 /* ret contains the number of new created mbufs */
713 if (num_tso_mbufs
< 0)
717 num_mbufs
= num_tso_mbufs
;
719 /* stats.errs will be incremented */
720 if (rte_pktmbuf_pkt_len(mbuf_in
) > max_size
)
723 /* ret 0 indicates no new mbufs were created */
729 ret
= tap_write_mbufs(txq
, num_mbufs
, mbuf
,
730 &num_packets
, &num_tx_bytes
);
734 if (num_tso_mbufs
> 0)
735 rte_pktmbuf_free_bulk(mbuf
, num_tso_mbufs
);
739 /* free original mbuf */
740 rte_pktmbuf_free(mbuf_in
);
742 if (num_tso_mbufs
> 0)
743 rte_pktmbuf_free_bulk(mbuf
, num_tso_mbufs
);
746 txq
->stats
.opackets
+= num_packets
;
747 txq
->stats
.errs
+= nb_pkts
- num_tx
;
748 txq
->stats
.obytes
+= num_tx_bytes
;
754 tap_ioctl_req2str(unsigned long request
)
758 return "SIOCSIFFLAGS";
760 return "SIOCGIFFLAGS";
762 return "SIOCGIFHWADDR";
764 return "SIOCSIFHWADDR";
772 tap_ioctl(struct pmd_internals
*pmd
, unsigned long request
,
773 struct ifreq
*ifr
, int set
, enum ioctl_mode mode
)
775 short req_flags
= ifr
->ifr_flags
;
776 int remote
= pmd
->remote_if_index
&&
777 (mode
== REMOTE_ONLY
|| mode
== LOCAL_AND_REMOTE
);
779 if (!pmd
->remote_if_index
&& mode
== REMOTE_ONLY
)
782 * If there is a remote netdevice, apply ioctl on it, then apply it on
787 strlcpy(ifr
->ifr_name
, pmd
->remote_iface
, IFNAMSIZ
);
788 else if (mode
== LOCAL_ONLY
|| mode
== LOCAL_AND_REMOTE
)
789 strlcpy(ifr
->ifr_name
, pmd
->name
, IFNAMSIZ
);
792 /* fetch current flags to leave other flags untouched */
793 if (ioctl(pmd
->ioctl_sock
, SIOCGIFFLAGS
, ifr
) < 0)
796 ifr
->ifr_flags
|= req_flags
;
798 ifr
->ifr_flags
&= ~req_flags
;
806 TAP_LOG(WARNING
, "%s: ioctl() called with wrong arg",
810 if (ioctl(pmd
->ioctl_sock
, request
, ifr
) < 0)
812 if (remote
-- && mode
== LOCAL_AND_REMOTE
)
817 TAP_LOG(DEBUG
, "%s(%s) failed: %s(%d)", ifr
->ifr_name
,
818 tap_ioctl_req2str(request
), strerror(errno
), errno
);
823 tap_link_set_down(struct rte_eth_dev
*dev
)
825 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
826 struct ifreq ifr
= { .ifr_flags
= IFF_UP
};
828 dev
->data
->dev_link
.link_status
= ETH_LINK_DOWN
;
829 return tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 0, LOCAL_ONLY
);
833 tap_link_set_up(struct rte_eth_dev
*dev
)
835 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
836 struct ifreq ifr
= { .ifr_flags
= IFF_UP
};
838 dev
->data
->dev_link
.link_status
= ETH_LINK_UP
;
839 return tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 1, LOCAL_AND_REMOTE
);
843 tap_dev_start(struct rte_eth_dev
*dev
)
847 err
= tap_intr_handle_set(dev
, 1);
851 err
= tap_link_set_up(dev
);
855 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
856 dev
->data
->tx_queue_state
[i
] = RTE_ETH_QUEUE_STATE_STARTED
;
857 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++)
858 dev
->data
->rx_queue_state
[i
] = RTE_ETH_QUEUE_STATE_STARTED
;
863 /* This function gets called when the current port gets stopped.
866 tap_dev_stop(struct rte_eth_dev
*dev
)
870 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
871 dev
->data
->tx_queue_state
[i
] = RTE_ETH_QUEUE_STATE_STOPPED
;
872 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++)
873 dev
->data
->rx_queue_state
[i
] = RTE_ETH_QUEUE_STATE_STOPPED
;
875 tap_intr_handle_set(dev
, 0);
876 tap_link_set_down(dev
);
880 tap_dev_configure(struct rte_eth_dev
*dev
)
882 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
884 if (dev
->data
->nb_rx_queues
> RTE_PMD_TAP_MAX_QUEUES
) {
886 "%s: number of rx queues %d exceeds max num of queues %d",
888 dev
->data
->nb_rx_queues
,
889 RTE_PMD_TAP_MAX_QUEUES
);
892 if (dev
->data
->nb_tx_queues
> RTE_PMD_TAP_MAX_QUEUES
) {
894 "%s: number of tx queues %d exceeds max num of queues %d",
896 dev
->data
->nb_tx_queues
,
897 RTE_PMD_TAP_MAX_QUEUES
);
901 TAP_LOG(INFO
, "%s: %s: TX configured queues number: %u",
902 dev
->device
->name
, pmd
->name
, dev
->data
->nb_tx_queues
);
904 TAP_LOG(INFO
, "%s: %s: RX configured queues number: %u",
905 dev
->device
->name
, pmd
->name
, dev
->data
->nb_rx_queues
);
911 tap_dev_speed_capa(void)
913 uint32_t speed
= pmd_link
.link_speed
;
916 if (speed
>= ETH_SPEED_NUM_10M
)
917 capa
|= ETH_LINK_SPEED_10M
;
918 if (speed
>= ETH_SPEED_NUM_100M
)
919 capa
|= ETH_LINK_SPEED_100M
;
920 if (speed
>= ETH_SPEED_NUM_1G
)
921 capa
|= ETH_LINK_SPEED_1G
;
922 if (speed
>= ETH_SPEED_NUM_5G
)
923 capa
|= ETH_LINK_SPEED_2_5G
;
924 if (speed
>= ETH_SPEED_NUM_5G
)
925 capa
|= ETH_LINK_SPEED_5G
;
926 if (speed
>= ETH_SPEED_NUM_10G
)
927 capa
|= ETH_LINK_SPEED_10G
;
928 if (speed
>= ETH_SPEED_NUM_20G
)
929 capa
|= ETH_LINK_SPEED_20G
;
930 if (speed
>= ETH_SPEED_NUM_25G
)
931 capa
|= ETH_LINK_SPEED_25G
;
932 if (speed
>= ETH_SPEED_NUM_40G
)
933 capa
|= ETH_LINK_SPEED_40G
;
934 if (speed
>= ETH_SPEED_NUM_50G
)
935 capa
|= ETH_LINK_SPEED_50G
;
936 if (speed
>= ETH_SPEED_NUM_56G
)
937 capa
|= ETH_LINK_SPEED_56G
;
938 if (speed
>= ETH_SPEED_NUM_100G
)
939 capa
|= ETH_LINK_SPEED_100G
;
945 tap_dev_info(struct rte_eth_dev
*dev
, struct rte_eth_dev_info
*dev_info
)
947 struct pmd_internals
*internals
= dev
->data
->dev_private
;
949 dev_info
->if_index
= internals
->if_index
;
950 dev_info
->max_mac_addrs
= 1;
951 dev_info
->max_rx_pktlen
= (uint32_t)RTE_ETHER_MAX_VLAN_FRAME_LEN
;
952 dev_info
->max_rx_queues
= RTE_PMD_TAP_MAX_QUEUES
;
953 dev_info
->max_tx_queues
= RTE_PMD_TAP_MAX_QUEUES
;
954 dev_info
->min_rx_bufsize
= 0;
955 dev_info
->speed_capa
= tap_dev_speed_capa();
956 dev_info
->rx_queue_offload_capa
= tap_rx_offload_get_queue_capa();
957 dev_info
->rx_offload_capa
= tap_rx_offload_get_port_capa() |
958 dev_info
->rx_queue_offload_capa
;
959 dev_info
->tx_queue_offload_capa
= tap_tx_offload_get_queue_capa();
960 dev_info
->tx_offload_capa
= tap_tx_offload_get_port_capa() |
961 dev_info
->tx_queue_offload_capa
;
962 dev_info
->hash_key_size
= TAP_RSS_HASH_KEY_SIZE
;
964 * limitation: TAP supports all of IP, UDP and TCP hash
965 * functions together and not in partial combinations
967 dev_info
->flow_type_rss_offloads
= ~TAP_RSS_HF_MASK
;
973 tap_stats_get(struct rte_eth_dev
*dev
, struct rte_eth_stats
*tap_stats
)
975 unsigned int i
, imax
;
976 unsigned long rx_total
= 0, tx_total
= 0, tx_err_total
= 0;
977 unsigned long rx_bytes_total
= 0, tx_bytes_total
= 0;
978 unsigned long rx_nombuf
= 0, ierrors
= 0;
979 const struct pmd_internals
*pmd
= dev
->data
->dev_private
;
981 /* rx queue statistics */
982 imax
= (dev
->data
->nb_rx_queues
< RTE_ETHDEV_QUEUE_STAT_CNTRS
) ?
983 dev
->data
->nb_rx_queues
: RTE_ETHDEV_QUEUE_STAT_CNTRS
;
984 for (i
= 0; i
< imax
; i
++) {
985 tap_stats
->q_ipackets
[i
] = pmd
->rxq
[i
].stats
.ipackets
;
986 tap_stats
->q_ibytes
[i
] = pmd
->rxq
[i
].stats
.ibytes
;
987 rx_total
+= tap_stats
->q_ipackets
[i
];
988 rx_bytes_total
+= tap_stats
->q_ibytes
[i
];
989 rx_nombuf
+= pmd
->rxq
[i
].stats
.rx_nombuf
;
990 ierrors
+= pmd
->rxq
[i
].stats
.ierrors
;
993 /* tx queue statistics */
994 imax
= (dev
->data
->nb_tx_queues
< RTE_ETHDEV_QUEUE_STAT_CNTRS
) ?
995 dev
->data
->nb_tx_queues
: RTE_ETHDEV_QUEUE_STAT_CNTRS
;
997 for (i
= 0; i
< imax
; i
++) {
998 tap_stats
->q_opackets
[i
] = pmd
->txq
[i
].stats
.opackets
;
999 tap_stats
->q_obytes
[i
] = pmd
->txq
[i
].stats
.obytes
;
1000 tx_total
+= tap_stats
->q_opackets
[i
];
1001 tx_err_total
+= pmd
->txq
[i
].stats
.errs
;
1002 tx_bytes_total
+= tap_stats
->q_obytes
[i
];
1005 tap_stats
->ipackets
= rx_total
;
1006 tap_stats
->ibytes
= rx_bytes_total
;
1007 tap_stats
->ierrors
= ierrors
;
1008 tap_stats
->rx_nombuf
= rx_nombuf
;
1009 tap_stats
->opackets
= tx_total
;
1010 tap_stats
->oerrors
= tx_err_total
;
1011 tap_stats
->obytes
= tx_bytes_total
;
1016 tap_stats_reset(struct rte_eth_dev
*dev
)
1019 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1021 for (i
= 0; i
< RTE_PMD_TAP_MAX_QUEUES
; i
++) {
1022 pmd
->rxq
[i
].stats
.ipackets
= 0;
1023 pmd
->rxq
[i
].stats
.ibytes
= 0;
1024 pmd
->rxq
[i
].stats
.ierrors
= 0;
1025 pmd
->rxq
[i
].stats
.rx_nombuf
= 0;
1027 pmd
->txq
[i
].stats
.opackets
= 0;
1028 pmd
->txq
[i
].stats
.errs
= 0;
1029 pmd
->txq
[i
].stats
.obytes
= 0;
1036 tap_dev_close(struct rte_eth_dev
*dev
)
1039 struct pmd_internals
*internals
= dev
->data
->dev_private
;
1040 struct pmd_process_private
*process_private
= dev
->process_private
;
1041 struct rx_queue
*rxq
;
1043 tap_link_set_down(dev
);
1044 if (internals
->nlsk_fd
!= -1) {
1045 tap_flow_flush(dev
, NULL
);
1046 tap_flow_implicit_flush(internals
, NULL
);
1047 tap_nl_final(internals
->nlsk_fd
);
1048 internals
->nlsk_fd
= -1;
1051 for (i
= 0; i
< RTE_PMD_TAP_MAX_QUEUES
; i
++) {
1052 if (process_private
->rxq_fds
[i
] != -1) {
1053 rxq
= &internals
->rxq
[i
];
1054 close(process_private
->rxq_fds
[i
]);
1055 process_private
->rxq_fds
[i
] = -1;
1056 tap_rxq_pool_free(rxq
->pool
);
1057 rte_free(rxq
->iovecs
);
1061 if (process_private
->txq_fds
[i
] != -1) {
1062 close(process_private
->txq_fds
[i
]);
1063 process_private
->txq_fds
[i
] = -1;
1067 if (internals
->remote_if_index
) {
1068 /* Restore initial remote state */
1069 ioctl(internals
->ioctl_sock
, SIOCSIFFLAGS
,
1070 &internals
->remote_initial_flags
);
1073 if (internals
->ka_fd
!= -1) {
1074 close(internals
->ka_fd
);
1075 internals
->ka_fd
= -1;
1078 * Since TUN device has no more opened file descriptors
1079 * it will be removed from kernel
1084 tap_rx_queue_release(void *queue
)
1086 struct rx_queue
*rxq
= queue
;
1087 struct pmd_process_private
*process_private
;
1091 process_private
= rte_eth_devices
[rxq
->in_port
].process_private
;
1092 if (process_private
->rxq_fds
[rxq
->queue_id
] != -1) {
1093 close(process_private
->rxq_fds
[rxq
->queue_id
]);
1094 process_private
->rxq_fds
[rxq
->queue_id
] = -1;
1095 tap_rxq_pool_free(rxq
->pool
);
1096 rte_free(rxq
->iovecs
);
1103 tap_tx_queue_release(void *queue
)
1105 struct tx_queue
*txq
= queue
;
1106 struct pmd_process_private
*process_private
;
1110 process_private
= rte_eth_devices
[txq
->out_port
].process_private
;
1112 if (process_private
->txq_fds
[txq
->queue_id
] != -1) {
1113 close(process_private
->txq_fds
[txq
->queue_id
]);
1114 process_private
->txq_fds
[txq
->queue_id
] = -1;
1119 tap_link_update(struct rte_eth_dev
*dev
, int wait_to_complete __rte_unused
)
1121 struct rte_eth_link
*dev_link
= &dev
->data
->dev_link
;
1122 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1123 struct ifreq ifr
= { .ifr_flags
= 0 };
1125 if (pmd
->remote_if_index
) {
1126 tap_ioctl(pmd
, SIOCGIFFLAGS
, &ifr
, 0, REMOTE_ONLY
);
1127 if (!(ifr
.ifr_flags
& IFF_UP
) ||
1128 !(ifr
.ifr_flags
& IFF_RUNNING
)) {
1129 dev_link
->link_status
= ETH_LINK_DOWN
;
1133 tap_ioctl(pmd
, SIOCGIFFLAGS
, &ifr
, 0, LOCAL_ONLY
);
1134 dev_link
->link_status
=
1135 ((ifr
.ifr_flags
& IFF_UP
) && (ifr
.ifr_flags
& IFF_RUNNING
) ?
1142 tap_promisc_enable(struct rte_eth_dev
*dev
)
1144 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1145 struct ifreq ifr
= { .ifr_flags
= IFF_PROMISC
};
1148 ret
= tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 1, LOCAL_AND_REMOTE
);
1152 if (pmd
->remote_if_index
&& !pmd
->flow_isolate
) {
1153 dev
->data
->promiscuous
= 1;
1154 ret
= tap_flow_implicit_create(pmd
, TAP_REMOTE_PROMISC
);
1156 /* Rollback promisc flag */
1157 tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 0, LOCAL_AND_REMOTE
);
1159 * rte_eth_dev_promiscuous_enable() rollback
1160 * dev->data->promiscuous in the case of failure.
1170 tap_promisc_disable(struct rte_eth_dev
*dev
)
1172 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1173 struct ifreq ifr
= { .ifr_flags
= IFF_PROMISC
};
1176 ret
= tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 0, LOCAL_AND_REMOTE
);
1180 if (pmd
->remote_if_index
&& !pmd
->flow_isolate
) {
1181 dev
->data
->promiscuous
= 0;
1182 ret
= tap_flow_implicit_destroy(pmd
, TAP_REMOTE_PROMISC
);
1184 /* Rollback promisc flag */
1185 tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 1, LOCAL_AND_REMOTE
);
1187 * rte_eth_dev_promiscuous_disable() rollback
1188 * dev->data->promiscuous in the case of failure.
1198 tap_allmulti_enable(struct rte_eth_dev
*dev
)
1200 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1201 struct ifreq ifr
= { .ifr_flags
= IFF_ALLMULTI
};
1204 ret
= tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 1, LOCAL_AND_REMOTE
);
1208 if (pmd
->remote_if_index
&& !pmd
->flow_isolate
) {
1209 dev
->data
->all_multicast
= 1;
1210 ret
= tap_flow_implicit_create(pmd
, TAP_REMOTE_ALLMULTI
);
1212 /* Rollback allmulti flag */
1213 tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 0, LOCAL_AND_REMOTE
);
1215 * rte_eth_dev_allmulticast_enable() rollback
1216 * dev->data->all_multicast in the case of failure.
1226 tap_allmulti_disable(struct rte_eth_dev
*dev
)
1228 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1229 struct ifreq ifr
= { .ifr_flags
= IFF_ALLMULTI
};
1232 ret
= tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 0, LOCAL_AND_REMOTE
);
1236 if (pmd
->remote_if_index
&& !pmd
->flow_isolate
) {
1237 dev
->data
->all_multicast
= 0;
1238 ret
= tap_flow_implicit_destroy(pmd
, TAP_REMOTE_ALLMULTI
);
1240 /* Rollback allmulti flag */
1241 tap_ioctl(pmd
, SIOCSIFFLAGS
, &ifr
, 1, LOCAL_AND_REMOTE
);
1243 * rte_eth_dev_allmulticast_disable() rollback
1244 * dev->data->all_multicast in the case of failure.
1254 tap_mac_set(struct rte_eth_dev
*dev
, struct rte_ether_addr
*mac_addr
)
1256 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1257 enum ioctl_mode mode
= LOCAL_ONLY
;
1261 if (pmd
->type
== ETH_TUNTAP_TYPE_TUN
) {
1262 TAP_LOG(ERR
, "%s: can't MAC address for TUN",
1267 if (rte_is_zero_ether_addr(mac_addr
)) {
1268 TAP_LOG(ERR
, "%s: can't set an empty MAC address",
1272 /* Check the actual current MAC address on the tap netdevice */
1273 ret
= tap_ioctl(pmd
, SIOCGIFHWADDR
, &ifr
, 0, LOCAL_ONLY
);
1276 if (rte_is_same_ether_addr(
1277 (struct rte_ether_addr
*)&ifr
.ifr_hwaddr
.sa_data
,
1280 /* Check the current MAC address on the remote */
1281 ret
= tap_ioctl(pmd
, SIOCGIFHWADDR
, &ifr
, 0, REMOTE_ONLY
);
1284 if (!rte_is_same_ether_addr(
1285 (struct rte_ether_addr
*)&ifr
.ifr_hwaddr
.sa_data
,
1287 mode
= LOCAL_AND_REMOTE
;
1288 ifr
.ifr_hwaddr
.sa_family
= AF_LOCAL
;
1289 rte_memcpy(ifr
.ifr_hwaddr
.sa_data
, mac_addr
, RTE_ETHER_ADDR_LEN
);
1290 ret
= tap_ioctl(pmd
, SIOCSIFHWADDR
, &ifr
, 1, mode
);
1293 rte_memcpy(&pmd
->eth_addr
, mac_addr
, RTE_ETHER_ADDR_LEN
);
1294 if (pmd
->remote_if_index
&& !pmd
->flow_isolate
) {
1295 /* Replace MAC redirection rule after a MAC change */
1296 ret
= tap_flow_implicit_destroy(pmd
, TAP_REMOTE_LOCAL_MAC
);
1299 "%s: Couldn't delete MAC redirection rule",
1303 ret
= tap_flow_implicit_create(pmd
, TAP_REMOTE_LOCAL_MAC
);
1306 "%s: Couldn't add MAC redirection rule",
1316 tap_gso_ctx_setup(struct rte_gso_ctx
*gso_ctx
, struct rte_eth_dev
*dev
)
1322 * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE bytes
1323 * size per mbuf use this pool for both direct and indirect mbufs
1326 struct rte_mempool
*mp
; /* Mempool for GSO packets */
1328 /* initialize GSO context */
1329 gso_types
= DEV_TX_OFFLOAD_TCP_TSO
;
1330 snprintf(pool_name
, sizeof(pool_name
), "mp_%s", dev
->device
->name
);
1331 mp
= rte_mempool_lookup((const char *)pool_name
);
1333 mp
= rte_pktmbuf_pool_create(pool_name
, TAP_GSO_MBUFS_NUM
,
1334 TAP_GSO_MBUF_CACHE_SIZE
, 0,
1335 RTE_PKTMBUF_HEADROOM
+ TAP_GSO_MBUF_SEG_SIZE
,
1338 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1341 "%s: failed to create mbuf pool for device %s\n",
1342 pmd
->name
, dev
->device
->name
);
1347 gso_ctx
->direct_pool
= mp
;
1348 gso_ctx
->indirect_pool
= mp
;
1349 gso_ctx
->gso_types
= gso_types
;
1350 gso_ctx
->gso_size
= 0; /* gso_size is set in tx_burst() per packet */
1357 tap_setup_queue(struct rte_eth_dev
*dev
,
1358 struct pmd_internals
*internals
,
1366 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1367 struct pmd_process_private
*process_private
= dev
->process_private
;
1368 struct rx_queue
*rx
= &internals
->rxq
[qid
];
1369 struct tx_queue
*tx
= &internals
->txq
[qid
];
1370 struct rte_gso_ctx
*gso_ctx
;
1373 fd
= &process_private
->rxq_fds
[qid
];
1374 other_fd
= &process_private
->txq_fds
[qid
];
1378 fd
= &process_private
->txq_fds
[qid
];
1379 other_fd
= &process_private
->rxq_fds
[qid
];
1381 gso_ctx
= &tx
->gso_ctx
;
1384 /* fd for this queue already exists */
1385 TAP_LOG(DEBUG
, "%s: fd %d for %s queue qid %d exists",
1386 pmd
->name
, *fd
, dir
, qid
);
1388 } else if (*other_fd
!= -1) {
1389 /* Only other_fd exists. dup it */
1390 *fd
= dup(*other_fd
);
1393 TAP_LOG(ERR
, "%s: dup() failed.", pmd
->name
);
1396 TAP_LOG(DEBUG
, "%s: dup fd %d for %s queue qid %d (%d)",
1397 pmd
->name
, *other_fd
, dir
, qid
, *fd
);
1399 /* Both RX and TX fds do not exist (equal -1). Create fd */
1400 *fd
= tun_alloc(pmd
, 0);
1402 *fd
= -1; /* restore original value */
1403 TAP_LOG(ERR
, "%s: tun_alloc() failed.", pmd
->name
);
1406 TAP_LOG(DEBUG
, "%s: add %s queue for qid %d fd %d",
1407 pmd
->name
, dir
, qid
, *fd
);
1410 tx
->mtu
= &dev
->data
->mtu
;
1411 rx
->rxmode
= &dev
->data
->dev_conf
.rxmode
;
1413 ret
= tap_gso_ctx_setup(gso_ctx
, dev
);
1418 tx
->type
= pmd
->type
;
1424 tap_rx_queue_setup(struct rte_eth_dev
*dev
,
1425 uint16_t rx_queue_id
,
1426 uint16_t nb_rx_desc
,
1427 unsigned int socket_id
,
1428 const struct rte_eth_rxconf
*rx_conf __rte_unused
,
1429 struct rte_mempool
*mp
)
1431 struct pmd_internals
*internals
= dev
->data
->dev_private
;
1432 struct pmd_process_private
*process_private
= dev
->process_private
;
1433 struct rx_queue
*rxq
= &internals
->rxq
[rx_queue_id
];
1434 struct rte_mbuf
**tmp
= &rxq
->pool
;
1435 long iov_max
= sysconf(_SC_IOV_MAX
);
1439 "_SC_IOV_MAX is not defined. Using %d as default",
1440 TAP_IOV_DEFAULT_MAX
);
1441 iov_max
= TAP_IOV_DEFAULT_MAX
;
1443 uint16_t nb_desc
= RTE_MIN(nb_rx_desc
, iov_max
- 1);
1444 struct iovec (*iovecs
)[nb_desc
+ 1];
1445 int data_off
= RTE_PKTMBUF_HEADROOM
;
1450 if (rx_queue_id
>= dev
->data
->nb_rx_queues
|| !mp
) {
1452 "nb_rx_queues %d too small or mempool NULL",
1453 dev
->data
->nb_rx_queues
);
1458 rxq
->trigger_seen
= 1; /* force initial burst */
1459 rxq
->in_port
= dev
->data
->port_id
;
1460 rxq
->queue_id
= rx_queue_id
;
1461 rxq
->nb_rx_desc
= nb_desc
;
1462 iovecs
= rte_zmalloc_socket(dev
->device
->name
, sizeof(*iovecs
), 0,
1466 "%s: Couldn't allocate %d RX descriptors",
1467 dev
->device
->name
, nb_desc
);
1470 rxq
->iovecs
= iovecs
;
1472 dev
->data
->rx_queues
[rx_queue_id
] = rxq
;
1473 fd
= tap_setup_queue(dev
, internals
, rx_queue_id
, 1);
1479 (*rxq
->iovecs
)[0].iov_len
= sizeof(struct tun_pi
);
1480 (*rxq
->iovecs
)[0].iov_base
= &rxq
->pi
;
1482 for (i
= 1; i
<= nb_desc
; i
++) {
1483 *tmp
= rte_pktmbuf_alloc(rxq
->mp
);
1486 "%s: couldn't allocate memory for queue %d",
1487 dev
->device
->name
, rx_queue_id
);
1491 (*rxq
->iovecs
)[i
].iov_len
= (*tmp
)->buf_len
- data_off
;
1492 (*rxq
->iovecs
)[i
].iov_base
=
1493 (char *)(*tmp
)->buf_addr
+ data_off
;
1495 tmp
= &(*tmp
)->next
;
1498 TAP_LOG(DEBUG
, " RX TUNTAP device name %s, qid %d on fd %d",
1499 internals
->name
, rx_queue_id
,
1500 process_private
->rxq_fds
[rx_queue_id
]);
1505 tap_rxq_pool_free(rxq
->pool
);
1507 rte_free(rxq
->iovecs
);
1513 tap_tx_queue_setup(struct rte_eth_dev
*dev
,
1514 uint16_t tx_queue_id
,
1515 uint16_t nb_tx_desc __rte_unused
,
1516 unsigned int socket_id __rte_unused
,
1517 const struct rte_eth_txconf
*tx_conf
)
1519 struct pmd_internals
*internals
= dev
->data
->dev_private
;
1520 struct pmd_process_private
*process_private
= dev
->process_private
;
1521 struct tx_queue
*txq
;
1525 if (tx_queue_id
>= dev
->data
->nb_tx_queues
)
1527 dev
->data
->tx_queues
[tx_queue_id
] = &internals
->txq
[tx_queue_id
];
1528 txq
= dev
->data
->tx_queues
[tx_queue_id
];
1529 txq
->out_port
= dev
->data
->port_id
;
1530 txq
->queue_id
= tx_queue_id
;
1532 offloads
= tx_conf
->offloads
| dev
->data
->dev_conf
.txmode
.offloads
;
1533 txq
->csum
= !!(offloads
&
1534 (DEV_TX_OFFLOAD_IPV4_CKSUM
|
1535 DEV_TX_OFFLOAD_UDP_CKSUM
|
1536 DEV_TX_OFFLOAD_TCP_CKSUM
));
1538 ret
= tap_setup_queue(dev
, internals
, tx_queue_id
, 0);
1542 " TX TUNTAP device name %s, qid %d on fd %d csum %s",
1543 internals
->name
, tx_queue_id
,
1544 process_private
->txq_fds
[tx_queue_id
],
1545 txq
->csum
? "on" : "off");
1551 tap_mtu_set(struct rte_eth_dev
*dev
, uint16_t mtu
)
1553 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1554 struct ifreq ifr
= { .ifr_mtu
= mtu
};
1557 err
= tap_ioctl(pmd
, SIOCSIFMTU
, &ifr
, 1, LOCAL_AND_REMOTE
);
1559 dev
->data
->mtu
= mtu
;
1565 tap_set_mc_addr_list(struct rte_eth_dev
*dev __rte_unused
,
1566 struct rte_ether_addr
*mc_addr_set __rte_unused
,
1567 uint32_t nb_mc_addr __rte_unused
)
1570 * Nothing to do actually: the tap has no filtering whatsoever, every
1571 * packet is received.
1577 tap_nl_msg_handler(struct nlmsghdr
*nh
, void *arg
)
1579 struct rte_eth_dev
*dev
= arg
;
1580 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1581 struct ifinfomsg
*info
= NLMSG_DATA(nh
);
1583 if (nh
->nlmsg_type
!= RTM_NEWLINK
||
1584 (info
->ifi_index
!= pmd
->if_index
&&
1585 info
->ifi_index
!= pmd
->remote_if_index
))
1587 return tap_link_update(dev
, 0);
1591 tap_dev_intr_handler(void *cb_arg
)
1593 struct rte_eth_dev
*dev
= cb_arg
;
1594 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1596 tap_nl_recv(pmd
->intr_handle
.fd
, tap_nl_msg_handler
, dev
);
1600 tap_lsc_intr_handle_set(struct rte_eth_dev
*dev
, int set
)
1602 struct pmd_internals
*pmd
= dev
->data
->dev_private
;
1605 /* In any case, disable interrupt if the conf is no longer there. */
1606 if (!dev
->data
->dev_conf
.intr_conf
.lsc
) {
1607 if (pmd
->intr_handle
.fd
!= -1) {
1613 pmd
->intr_handle
.fd
= tap_nl_init(RTMGRP_LINK
);
1614 if (unlikely(pmd
->intr_handle
.fd
== -1))
1616 return rte_intr_callback_register(
1617 &pmd
->intr_handle
, tap_dev_intr_handler
, dev
);
1622 ret
= rte_intr_callback_unregister(&pmd
->intr_handle
,
1623 tap_dev_intr_handler
, dev
);
1626 } else if (ret
== -EAGAIN
) {
1629 TAP_LOG(ERR
, "intr callback unregister failed: %d",
1635 tap_nl_final(pmd
->intr_handle
.fd
);
1636 pmd
->intr_handle
.fd
= -1;
1642 tap_intr_handle_set(struct rte_eth_dev
*dev
, int set
)
1646 err
= tap_lsc_intr_handle_set(dev
, set
);
1649 tap_rx_intr_vec_set(dev
, 0);
1652 err
= tap_rx_intr_vec_set(dev
, set
);
1654 tap_lsc_intr_handle_set(dev
, 0);
1658 static const uint32_t*
1659 tap_dev_supported_ptypes_get(struct rte_eth_dev
*dev __rte_unused
)
1661 static const uint32_t ptypes
[] = {
1662 RTE_PTYPE_INNER_L2_ETHER
,
1663 RTE_PTYPE_INNER_L2_ETHER_VLAN
,
1664 RTE_PTYPE_INNER_L2_ETHER_QINQ
,
1665 RTE_PTYPE_INNER_L3_IPV4
,
1666 RTE_PTYPE_INNER_L3_IPV4_EXT
,
1667 RTE_PTYPE_INNER_L3_IPV6
,
1668 RTE_PTYPE_INNER_L3_IPV6_EXT
,
1669 RTE_PTYPE_INNER_L4_FRAG
,
1670 RTE_PTYPE_INNER_L4_UDP
,
1671 RTE_PTYPE_INNER_L4_TCP
,
1672 RTE_PTYPE_INNER_L4_SCTP
,
1674 RTE_PTYPE_L2_ETHER_VLAN
,
1675 RTE_PTYPE_L2_ETHER_QINQ
,
1677 RTE_PTYPE_L3_IPV4_EXT
,
1678 RTE_PTYPE_L3_IPV6_EXT
,
1690 tap_flow_ctrl_get(struct rte_eth_dev
*dev __rte_unused
,
1691 struct rte_eth_fc_conf
*fc_conf
)
1693 fc_conf
->mode
= RTE_FC_NONE
;
1698 tap_flow_ctrl_set(struct rte_eth_dev
*dev __rte_unused
,
1699 struct rte_eth_fc_conf
*fc_conf
)
1701 if (fc_conf
->mode
!= RTE_FC_NONE
)
1707 * DPDK callback to update the RSS hash configuration.
1710 * Pointer to Ethernet device structure.
1711 * @param[in] rss_conf
1712 * RSS configuration data.
1715 * 0 on success, a negative errno value otherwise and rte_errno is set.
1718 tap_rss_hash_update(struct rte_eth_dev
*dev
,
1719 struct rte_eth_rss_conf
*rss_conf
)
1721 if (rss_conf
->rss_hf
& TAP_RSS_HF_MASK
) {
1725 if (rss_conf
->rss_key
&& rss_conf
->rss_key_len
) {
1727 * Currently TAP RSS key is hard coded
1728 * and cannot be updated
1731 "port %u RSS key cannot be updated",
1732 dev
->data
->port_id
);
1740 tap_rx_queue_start(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
)
1742 dev
->data
->rx_queue_state
[rx_queue_id
] = RTE_ETH_QUEUE_STATE_STARTED
;
1748 tap_tx_queue_start(struct rte_eth_dev
*dev
, uint16_t tx_queue_id
)
1750 dev
->data
->tx_queue_state
[tx_queue_id
] = RTE_ETH_QUEUE_STATE_STARTED
;
1756 tap_rx_queue_stop(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
)
1758 dev
->data
->rx_queue_state
[rx_queue_id
] = RTE_ETH_QUEUE_STATE_STOPPED
;
1764 tap_tx_queue_stop(struct rte_eth_dev
*dev
, uint16_t tx_queue_id
)
1766 dev
->data
->tx_queue_state
[tx_queue_id
] = RTE_ETH_QUEUE_STATE_STOPPED
;
1770 static const struct eth_dev_ops ops
= {
1771 .dev_start
= tap_dev_start
,
1772 .dev_stop
= tap_dev_stop
,
1773 .dev_close
= tap_dev_close
,
1774 .dev_configure
= tap_dev_configure
,
1775 .dev_infos_get
= tap_dev_info
,
1776 .rx_queue_setup
= tap_rx_queue_setup
,
1777 .tx_queue_setup
= tap_tx_queue_setup
,
1778 .rx_queue_start
= tap_rx_queue_start
,
1779 .tx_queue_start
= tap_tx_queue_start
,
1780 .rx_queue_stop
= tap_rx_queue_stop
,
1781 .tx_queue_stop
= tap_tx_queue_stop
,
1782 .rx_queue_release
= tap_rx_queue_release
,
1783 .tx_queue_release
= tap_tx_queue_release
,
1784 .flow_ctrl_get
= tap_flow_ctrl_get
,
1785 .flow_ctrl_set
= tap_flow_ctrl_set
,
1786 .link_update
= tap_link_update
,
1787 .dev_set_link_up
= tap_link_set_up
,
1788 .dev_set_link_down
= tap_link_set_down
,
1789 .promiscuous_enable
= tap_promisc_enable
,
1790 .promiscuous_disable
= tap_promisc_disable
,
1791 .allmulticast_enable
= tap_allmulti_enable
,
1792 .allmulticast_disable
= tap_allmulti_disable
,
1793 .mac_addr_set
= tap_mac_set
,
1794 .mtu_set
= tap_mtu_set
,
1795 .set_mc_addr_list
= tap_set_mc_addr_list
,
1796 .stats_get
= tap_stats_get
,
1797 .stats_reset
= tap_stats_reset
,
1798 .dev_supported_ptypes_get
= tap_dev_supported_ptypes_get
,
1799 .rss_hash_update
= tap_rss_hash_update
,
1800 .filter_ctrl
= tap_dev_filter_ctrl
,
1803 static const char *tuntap_types
[ETH_TUNTAP_TYPE_MAX
] = {
1804 "UNKNOWN", "TUN", "TAP"
1808 eth_dev_tap_create(struct rte_vdev_device
*vdev
, const char *tap_name
,
1809 char *remote_iface
, struct rte_ether_addr
*mac_addr
,
1810 enum rte_tuntap_type type
)
1812 int numa_node
= rte_socket_id();
1813 struct rte_eth_dev
*dev
;
1814 struct pmd_internals
*pmd
;
1815 struct pmd_process_private
*process_private
;
1816 const char *tuntap_name
= tuntap_types
[type
];
1817 struct rte_eth_dev_data
*data
;
1821 TAP_LOG(DEBUG
, "%s device on numa %u", tuntap_name
, rte_socket_id());
1823 dev
= rte_eth_vdev_allocate(vdev
, sizeof(*pmd
));
1825 TAP_LOG(ERR
, "%s Unable to allocate device struct",
1827 goto error_exit_nodev
;
1830 process_private
= (struct pmd_process_private
*)
1831 rte_zmalloc_socket(tap_name
, sizeof(struct pmd_process_private
),
1832 RTE_CACHE_LINE_SIZE
, dev
->device
->numa_node
);
1834 if (process_private
== NULL
) {
1835 TAP_LOG(ERR
, "Failed to alloc memory for process private");
1838 pmd
= dev
->data
->dev_private
;
1839 dev
->process_private
= process_private
;
1841 strlcpy(pmd
->name
, tap_name
, sizeof(pmd
->name
));
1846 pmd
->ioctl_sock
= socket(AF_INET
, SOCK_DGRAM
, 0);
1847 if (pmd
->ioctl_sock
== -1) {
1849 "%s Unable to get a socket for management: %s",
1850 tuntap_name
, strerror(errno
));
1854 /* Setup some default values */
1856 data
->dev_private
= pmd
;
1857 data
->dev_flags
= RTE_ETH_DEV_INTR_LSC
;
1858 data
->numa_node
= numa_node
;
1860 data
->dev_link
= pmd_link
;
1861 data
->mac_addrs
= &pmd
->eth_addr
;
1862 /* Set the number of RX and TX queues */
1863 data
->nb_rx_queues
= 0;
1864 data
->nb_tx_queues
= 0;
1866 dev
->dev_ops
= &ops
;
1867 dev
->rx_pkt_burst
= pmd_rx_burst
;
1868 dev
->tx_pkt_burst
= pmd_tx_burst
;
1870 pmd
->intr_handle
.type
= RTE_INTR_HANDLE_EXT
;
1871 pmd
->intr_handle
.fd
= -1;
1872 dev
->intr_handle
= &pmd
->intr_handle
;
1874 /* Presetup the fds to -1 as being not valid */
1875 for (i
= 0; i
< RTE_PMD_TAP_MAX_QUEUES
; i
++) {
1876 process_private
->rxq_fds
[i
] = -1;
1877 process_private
->txq_fds
[i
] = -1;
1880 if (pmd
->type
== ETH_TUNTAP_TYPE_TAP
) {
1881 if (rte_is_zero_ether_addr(mac_addr
))
1882 rte_eth_random_addr((uint8_t *)&pmd
->eth_addr
);
1884 rte_memcpy(&pmd
->eth_addr
, mac_addr
, sizeof(*mac_addr
));
1888 * Allocate a TUN device keep-alive file descriptor that will only be
1889 * closed when the TUN device itself is closed or removed.
1890 * This keep-alive file descriptor will guarantee that the TUN device
1891 * exists even when all of its queues are closed
1893 pmd
->ka_fd
= tun_alloc(pmd
, 1);
1894 if (pmd
->ka_fd
== -1) {
1895 TAP_LOG(ERR
, "Unable to create %s interface", tuntap_name
);
1898 TAP_LOG(DEBUG
, "allocated %s", pmd
->name
);
1900 ifr
.ifr_mtu
= dev
->data
->mtu
;
1901 if (tap_ioctl(pmd
, SIOCSIFMTU
, &ifr
, 1, LOCAL_AND_REMOTE
) < 0)
1904 if (pmd
->type
== ETH_TUNTAP_TYPE_TAP
) {
1905 memset(&ifr
, 0, sizeof(struct ifreq
));
1906 ifr
.ifr_hwaddr
.sa_family
= AF_LOCAL
;
1907 rte_memcpy(ifr
.ifr_hwaddr
.sa_data
, &pmd
->eth_addr
,
1908 RTE_ETHER_ADDR_LEN
);
1909 if (tap_ioctl(pmd
, SIOCSIFHWADDR
, &ifr
, 0, LOCAL_ONLY
) < 0)
1914 * Set up everything related to rte_flow:
1916 * - tap / remote if_index
1917 * - mandatory QDISCs
1918 * - rte_flow actual/implicit lists
1921 pmd
->nlsk_fd
= tap_nl_init(0);
1922 if (pmd
->nlsk_fd
== -1) {
1923 TAP_LOG(WARNING
, "%s: failed to create netlink socket.",
1925 goto disable_rte_flow
;
1927 pmd
->if_index
= if_nametoindex(pmd
->name
);
1928 if (!pmd
->if_index
) {
1929 TAP_LOG(ERR
, "%s: failed to get if_index.", pmd
->name
);
1930 goto disable_rte_flow
;
1932 if (qdisc_create_multiq(pmd
->nlsk_fd
, pmd
->if_index
) < 0) {
1933 TAP_LOG(ERR
, "%s: failed to create multiq qdisc.",
1935 goto disable_rte_flow
;
1937 if (qdisc_create_ingress(pmd
->nlsk_fd
, pmd
->if_index
) < 0) {
1938 TAP_LOG(ERR
, "%s: failed to create ingress qdisc.",
1940 goto disable_rte_flow
;
1942 LIST_INIT(&pmd
->flows
);
1944 if (strlen(remote_iface
)) {
1945 pmd
->remote_if_index
= if_nametoindex(remote_iface
);
1946 if (!pmd
->remote_if_index
) {
1947 TAP_LOG(ERR
, "%s: failed to get %s if_index.",
1948 pmd
->name
, remote_iface
);
1951 strlcpy(pmd
->remote_iface
, remote_iface
, RTE_ETH_NAME_MAX_LEN
);
1953 /* Save state of remote device */
1954 tap_ioctl(pmd
, SIOCGIFFLAGS
, &pmd
->remote_initial_flags
, 0, REMOTE_ONLY
);
1956 /* Replicate remote MAC address */
1957 if (tap_ioctl(pmd
, SIOCGIFHWADDR
, &ifr
, 0, REMOTE_ONLY
) < 0) {
1958 TAP_LOG(ERR
, "%s: failed to get %s MAC address.",
1959 pmd
->name
, pmd
->remote_iface
);
1962 rte_memcpy(&pmd
->eth_addr
, ifr
.ifr_hwaddr
.sa_data
,
1963 RTE_ETHER_ADDR_LEN
);
1964 /* The desired MAC is already in ifreq after SIOCGIFHWADDR. */
1965 if (tap_ioctl(pmd
, SIOCSIFHWADDR
, &ifr
, 0, LOCAL_ONLY
) < 0) {
1966 TAP_LOG(ERR
, "%s: failed to get %s MAC address.",
1967 pmd
->name
, remote_iface
);
1972 * Flush usually returns negative value because it tries to
1973 * delete every QDISC (and on a running device, one QDISC at
1974 * least is needed). Ignore negative return value.
1976 qdisc_flush(pmd
->nlsk_fd
, pmd
->remote_if_index
);
1977 if (qdisc_create_ingress(pmd
->nlsk_fd
,
1978 pmd
->remote_if_index
) < 0) {
1979 TAP_LOG(ERR
, "%s: failed to create ingress qdisc.",
1983 LIST_INIT(&pmd
->implicit_flows
);
1984 if (tap_flow_implicit_create(pmd
, TAP_REMOTE_TX
) < 0 ||
1985 tap_flow_implicit_create(pmd
, TAP_REMOTE_LOCAL_MAC
) < 0 ||
1986 tap_flow_implicit_create(pmd
, TAP_REMOTE_BROADCAST
) < 0 ||
1987 tap_flow_implicit_create(pmd
, TAP_REMOTE_BROADCASTV6
) < 0) {
1989 "%s: failed to create implicit rules.",
1995 rte_eth_dev_probing_finish(dev
);
1999 TAP_LOG(ERR
, " Disabling rte flow support: %s(%d)",
2000 strerror(errno
), errno
);
2001 if (strlen(remote_iface
)) {
2002 TAP_LOG(ERR
, "Remote feature requires flow support.");
2005 rte_eth_dev_probing_finish(dev
);
2009 TAP_LOG(ERR
, " Can't set up remote feature: %s(%d)",
2010 strerror(errno
), errno
);
2011 tap_flow_implicit_flush(pmd
, NULL
);
2014 if (pmd
->nlsk_fd
!= -1)
2015 close(pmd
->nlsk_fd
);
2016 if (pmd
->ka_fd
!= -1)
2018 if (pmd
->ioctl_sock
!= -1)
2019 close(pmd
->ioctl_sock
);
2020 /* mac_addrs must not be freed alone because part of dev_private */
2021 dev
->data
->mac_addrs
= NULL
;
2022 rte_eth_dev_release_port(dev
);
2025 TAP_LOG(ERR
, "%s Unable to initialize %s",
2026 tuntap_name
, rte_vdev_device_name(vdev
));
2031 /* make sure name is a possible Linux network device name */
2033 is_valid_iface(const char *name
)
2038 if (strnlen(name
, IFNAMSIZ
) == IFNAMSIZ
)
2042 if (*name
== '/' || *name
== ':' || isspace(*name
))
2050 set_interface_name(const char *key __rte_unused
,
2054 char *name
= (char *)extra_args
;
2057 if (!is_valid_iface(value
)) {
2058 TAP_LOG(ERR
, "TAP invalid remote interface name (%s)",
2062 strlcpy(name
, value
, RTE_ETH_NAME_MAX_LEN
);
2064 /* use tap%d which causes kernel to choose next available */
2065 strlcpy(name
, DEFAULT_TAP_NAME
"%d", RTE_ETH_NAME_MAX_LEN
);
2071 set_remote_iface(const char *key __rte_unused
,
2075 char *name
= (char *)extra_args
;
2078 if (!is_valid_iface(value
)) {
2079 TAP_LOG(ERR
, "TAP invalid remote interface name (%s)",
2083 strlcpy(name
, value
, RTE_ETH_NAME_MAX_LEN
);
2089 static int parse_user_mac(struct rte_ether_addr
*user_mac
,
2092 unsigned int index
= 0;
2093 char mac_temp
[strlen(ETH_TAP_USR_MAC_FMT
) + 1], *mac_byte
= NULL
;
2095 if (user_mac
== NULL
|| value
== NULL
)
2098 strlcpy(mac_temp
, value
, sizeof(mac_temp
));
2099 mac_byte
= strtok(mac_temp
, ":");
2101 while ((mac_byte
!= NULL
) &&
2102 (strlen(mac_byte
) <= 2) &&
2103 (strlen(mac_byte
) == strspn(mac_byte
,
2104 ETH_TAP_CMP_MAC_FMT
))) {
2105 user_mac
->addr_bytes
[index
++] = strtoul(mac_byte
, NULL
, 16);
2106 mac_byte
= strtok(NULL
, ":");
2113 set_mac_type(const char *key __rte_unused
,
2117 struct rte_ether_addr
*user_mac
= extra_args
;
2122 if (!strncasecmp(ETH_TAP_MAC_FIXED
, value
, strlen(ETH_TAP_MAC_FIXED
))) {
2123 static int iface_idx
;
2125 /* fixed mac = 00:64:74:61:70:<iface_idx> */
2126 memcpy((char *)user_mac
->addr_bytes
, "\0dtap",
2127 RTE_ETHER_ADDR_LEN
);
2128 user_mac
->addr_bytes
[RTE_ETHER_ADDR_LEN
- 1] =
2133 if (parse_user_mac(user_mac
, value
) != 6)
2136 TAP_LOG(DEBUG
, "TAP user MAC param (%s)", value
);
2140 TAP_LOG(ERR
, "TAP user MAC (%s) is not in format (%s|%s)",
2141 value
, ETH_TAP_MAC_FIXED
, ETH_TAP_USR_MAC_FMT
);
2146 * Open a TUN interface device. TUN PMD
2147 * 1) sets tap_type as false
2148 * 2) intakes iface as argument.
2149 * 3) as interface is virtual set speed to 10G
2152 rte_pmd_tun_probe(struct rte_vdev_device
*dev
)
2154 const char *name
, *params
;
2156 struct rte_kvargs
*kvlist
= NULL
;
2157 char tun_name
[RTE_ETH_NAME_MAX_LEN
];
2158 char remote_iface
[RTE_ETH_NAME_MAX_LEN
];
2159 struct rte_eth_dev
*eth_dev
;
2161 name
= rte_vdev_device_name(dev
);
2162 params
= rte_vdev_device_args(dev
);
2163 memset(remote_iface
, 0, RTE_ETH_NAME_MAX_LEN
);
2165 if (rte_eal_process_type() == RTE_PROC_SECONDARY
&&
2166 strlen(params
) == 0) {
2167 eth_dev
= rte_eth_dev_attach_secondary(name
);
2169 TAP_LOG(ERR
, "Failed to probe %s", name
);
2172 eth_dev
->dev_ops
= &ops
;
2173 eth_dev
->device
= &dev
->device
;
2174 rte_eth_dev_probing_finish(eth_dev
);
2178 /* use tun%d which causes kernel to choose next available */
2179 strlcpy(tun_name
, DEFAULT_TUN_NAME
"%d", RTE_ETH_NAME_MAX_LEN
);
2181 if (params
&& (params
[0] != '\0')) {
2182 TAP_LOG(DEBUG
, "parameters (%s)", params
);
2184 kvlist
= rte_kvargs_parse(params
, valid_arguments
);
2186 if (rte_kvargs_count(kvlist
, ETH_TAP_IFACE_ARG
) == 1) {
2187 ret
= rte_kvargs_process(kvlist
,
2189 &set_interface_name
,
2197 pmd_link
.link_speed
= ETH_SPEED_NUM_10G
;
2199 TAP_LOG(DEBUG
, "Initializing pmd_tun for %s", name
);
2201 ret
= eth_dev_tap_create(dev
, tun_name
, remote_iface
, 0,
2202 ETH_TUNTAP_TYPE_TUN
);
2206 TAP_LOG(ERR
, "Failed to create pmd for %s as %s",
2209 rte_kvargs_free(kvlist
);
2214 /* Request queue file descriptors from secondary to primary. */
2216 tap_mp_attach_queues(const char *port_name
, struct rte_eth_dev
*dev
)
2219 struct timespec timeout
= {.tv_sec
= 1, .tv_nsec
= 0};
2220 struct rte_mp_msg request
, *reply
;
2221 struct rte_mp_reply replies
;
2222 struct ipc_queues
*request_param
= (struct ipc_queues
*)request
.param
;
2223 struct ipc_queues
*reply_param
;
2224 struct pmd_process_private
*process_private
= dev
->process_private
;
2225 int queue
, fd_iterator
;
2227 /* Prepare the request */
2228 memset(&request
, 0, sizeof(request
));
2229 strlcpy(request
.name
, TAP_MP_KEY
, sizeof(request
.name
));
2230 strlcpy(request_param
->port_name
, port_name
,
2231 sizeof(request_param
->port_name
));
2232 request
.len_param
= sizeof(*request_param
);
2233 /* Send request and receive reply */
2234 ret
= rte_mp_request_sync(&request
, &replies
, &timeout
);
2235 if (ret
< 0 || replies
.nb_received
!= 1) {
2236 TAP_LOG(ERR
, "Failed to request queues from primary: %d",
2240 reply
= &replies
.msgs
[0];
2241 reply_param
= (struct ipc_queues
*)reply
->param
;
2242 TAP_LOG(DEBUG
, "Received IPC reply for %s", reply_param
->port_name
);
2244 /* Attach the queues from received file descriptors */
2245 if (reply_param
->rxq_count
+ reply_param
->txq_count
!= reply
->num_fds
) {
2246 TAP_LOG(ERR
, "Unexpected number of fds received");
2250 dev
->data
->nb_rx_queues
= reply_param
->rxq_count
;
2251 dev
->data
->nb_tx_queues
= reply_param
->txq_count
;
2253 for (queue
= 0; queue
< reply_param
->rxq_count
; queue
++)
2254 process_private
->rxq_fds
[queue
] = reply
->fds
[fd_iterator
++];
2255 for (queue
= 0; queue
< reply_param
->txq_count
; queue
++)
2256 process_private
->txq_fds
[queue
] = reply
->fds
[fd_iterator
++];
2261 /* Send the queue file descriptors from the primary process to secondary. */
2263 tap_mp_sync_queues(const struct rte_mp_msg
*request
, const void *peer
)
2265 struct rte_eth_dev
*dev
;
2266 struct pmd_process_private
*process_private
;
2267 struct rte_mp_msg reply
;
2268 const struct ipc_queues
*request_param
=
2269 (const struct ipc_queues
*)request
->param
;
2270 struct ipc_queues
*reply_param
=
2271 (struct ipc_queues
*)reply
.param
;
2276 /* Get requested port */
2277 TAP_LOG(DEBUG
, "Received IPC request for %s", request_param
->port_name
);
2278 ret
= rte_eth_dev_get_port_by_name(request_param
->port_name
, &port_id
);
2280 TAP_LOG(ERR
, "Failed to get port id for %s",
2281 request_param
->port_name
);
2284 dev
= &rte_eth_devices
[port_id
];
2285 process_private
= dev
->process_private
;
2287 /* Fill file descriptors for all queues */
2289 reply_param
->rxq_count
= 0;
2290 if (dev
->data
->nb_rx_queues
+ dev
->data
->nb_tx_queues
>
2292 TAP_LOG(ERR
, "Number of rx/tx queues exceeds max number of fds");
2296 for (queue
= 0; queue
< dev
->data
->nb_rx_queues
; queue
++) {
2297 reply
.fds
[reply
.num_fds
++] = process_private
->rxq_fds
[queue
];
2298 reply_param
->rxq_count
++;
2300 RTE_ASSERT(reply_param
->rxq_count
== dev
->data
->nb_rx_queues
);
2302 reply_param
->txq_count
= 0;
2303 for (queue
= 0; queue
< dev
->data
->nb_tx_queues
; queue
++) {
2304 reply
.fds
[reply
.num_fds
++] = process_private
->txq_fds
[queue
];
2305 reply_param
->txq_count
++;
2307 RTE_ASSERT(reply_param
->txq_count
== dev
->data
->nb_tx_queues
);
2310 strlcpy(reply
.name
, request
->name
, sizeof(reply
.name
));
2311 strlcpy(reply_param
->port_name
, request_param
->port_name
,
2312 sizeof(reply_param
->port_name
));
2313 reply
.len_param
= sizeof(*reply_param
);
2314 if (rte_mp_reply(&reply
, peer
) < 0) {
2315 TAP_LOG(ERR
, "Failed to reply an IPC request to sync queues");
2321 /* Open a TAP interface device.
2324 rte_pmd_tap_probe(struct rte_vdev_device
*dev
)
2326 const char *name
, *params
;
2328 struct rte_kvargs
*kvlist
= NULL
;
2330 char tap_name
[RTE_ETH_NAME_MAX_LEN
];
2331 char remote_iface
[RTE_ETH_NAME_MAX_LEN
];
2332 struct rte_ether_addr user_mac
= { .addr_bytes
= {0} };
2333 struct rte_eth_dev
*eth_dev
;
2334 int tap_devices_count_increased
= 0;
2336 name
= rte_vdev_device_name(dev
);
2337 params
= rte_vdev_device_args(dev
);
2339 if (rte_eal_process_type() == RTE_PROC_SECONDARY
) {
2340 eth_dev
= rte_eth_dev_attach_secondary(name
);
2342 TAP_LOG(ERR
, "Failed to probe %s", name
);
2345 eth_dev
->dev_ops
= &ops
;
2346 eth_dev
->device
= &dev
->device
;
2347 eth_dev
->rx_pkt_burst
= pmd_rx_burst
;
2348 eth_dev
->tx_pkt_burst
= pmd_tx_burst
;
2349 if (!rte_eal_primary_proc_alive(NULL
)) {
2350 TAP_LOG(ERR
, "Primary process is missing");
2353 eth_dev
->process_private
= (struct pmd_process_private
*)
2354 rte_zmalloc_socket(name
,
2355 sizeof(struct pmd_process_private
),
2356 RTE_CACHE_LINE_SIZE
,
2357 eth_dev
->device
->numa_node
);
2358 if (eth_dev
->process_private
== NULL
) {
2360 "Failed to alloc memory for process private");
2364 ret
= tap_mp_attach_queues(name
, eth_dev
);
2367 rte_eth_dev_probing_finish(eth_dev
);
2371 speed
= ETH_SPEED_NUM_10G
;
2373 /* use tap%d which causes kernel to choose next available */
2374 strlcpy(tap_name
, DEFAULT_TAP_NAME
"%d", RTE_ETH_NAME_MAX_LEN
);
2375 memset(remote_iface
, 0, RTE_ETH_NAME_MAX_LEN
);
2377 if (params
&& (params
[0] != '\0')) {
2378 TAP_LOG(DEBUG
, "parameters (%s)", params
);
2380 kvlist
= rte_kvargs_parse(params
, valid_arguments
);
2382 if (rte_kvargs_count(kvlist
, ETH_TAP_IFACE_ARG
) == 1) {
2383 ret
= rte_kvargs_process(kvlist
,
2385 &set_interface_name
,
2391 if (rte_kvargs_count(kvlist
, ETH_TAP_REMOTE_ARG
) == 1) {
2392 ret
= rte_kvargs_process(kvlist
,
2400 if (rte_kvargs_count(kvlist
, ETH_TAP_MAC_ARG
) == 1) {
2401 ret
= rte_kvargs_process(kvlist
,
2410 pmd_link
.link_speed
= speed
;
2412 TAP_LOG(DEBUG
, "Initializing pmd_tap for %s", name
);
2414 /* Register IPC feed callback */
2415 if (!tap_devices_count
) {
2416 ret
= rte_mp_action_register(TAP_MP_KEY
, tap_mp_sync_queues
);
2417 if (ret
< 0 && rte_errno
!= ENOTSUP
) {
2418 TAP_LOG(ERR
, "tap: Failed to register IPC callback: %s",
2419 strerror(rte_errno
));
2423 tap_devices_count
++;
2424 tap_devices_count_increased
= 1;
2425 ret
= eth_dev_tap_create(dev
, tap_name
, remote_iface
, &user_mac
,
2426 ETH_TUNTAP_TYPE_TAP
);
2430 TAP_LOG(ERR
, "Failed to create pmd for %s as %s",
2432 if (tap_devices_count_increased
== 1) {
2433 if (tap_devices_count
== 1)
2434 rte_mp_action_unregister(TAP_MP_KEY
);
2435 tap_devices_count
--;
2438 rte_kvargs_free(kvlist
);
2443 /* detach a TUNTAP device.
2446 rte_pmd_tap_remove(struct rte_vdev_device
*dev
)
2448 struct rte_eth_dev
*eth_dev
= NULL
;
2449 struct pmd_internals
*internals
;
2451 /* find the ethdev entry */
2452 eth_dev
= rte_eth_dev_allocated(rte_vdev_device_name(dev
));
2456 /* mac_addrs must not be freed alone because part of dev_private */
2457 eth_dev
->data
->mac_addrs
= NULL
;
2459 if (rte_eal_process_type() != RTE_PROC_PRIMARY
)
2460 return rte_eth_dev_release_port(eth_dev
);
2462 tap_dev_close(eth_dev
);
2464 internals
= eth_dev
->data
->dev_private
;
2465 TAP_LOG(DEBUG
, "Closing %s Ethernet device on numa %u",
2466 tuntap_types
[internals
->type
], rte_socket_id());
2468 close(internals
->ioctl_sock
);
2469 rte_free(eth_dev
->process_private
);
2470 if (tap_devices_count
== 1)
2471 rte_mp_action_unregister(TAP_MP_KEY
);
2472 tap_devices_count
--;
2473 rte_eth_dev_release_port(eth_dev
);
2478 static struct rte_vdev_driver pmd_tun_drv
= {
2479 .probe
= rte_pmd_tun_probe
,
2480 .remove
= rte_pmd_tap_remove
,
2483 static struct rte_vdev_driver pmd_tap_drv
= {
2484 .probe
= rte_pmd_tap_probe
,
2485 .remove
= rte_pmd_tap_remove
,
2488 RTE_PMD_REGISTER_VDEV(net_tap
, pmd_tap_drv
);
2489 RTE_PMD_REGISTER_VDEV(net_tun
, pmd_tun_drv
);
2490 RTE_PMD_REGISTER_ALIAS(net_tap
, eth_tap
);
2491 RTE_PMD_REGISTER_PARAM_STRING(net_tun
,
2492 ETH_TAP_IFACE_ARG
"=<string> ");
2493 RTE_PMD_REGISTER_PARAM_STRING(net_tap
,
2494 ETH_TAP_IFACE_ARG
"=<string> "
2495 ETH_TAP_MAC_ARG
"=" ETH_TAP_MAC_ARG_FMT
" "
2496 ETH_TAP_REMOTE_ARG
"=<string>");
2499 RTE_INIT(tap_init_log
)
2501 tap_logtype
= rte_log_register("pmd.net.tap");
2502 if (tap_logtype
>= 0)
2503 rte_log_set_level(tap_logtype
, RTE_LOG_NOTICE
);