1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
6 #include <linux/if_ether.h>
7 #include <linux/if_vlan.h>
8 #include <linux/virtio_net.h>
9 #include <linux/virtio_ring.h>
10 #include <sys/param.h>
13 #include <rte_ethdev.h>
15 #include <rte_string_fns.h>
17 #include <rte_malloc.h>
23 #include "rte_vhost.h"
25 #include "vxlan_setup.h"
27 #define IPV4_HEADER_LEN 20
28 #define UDP_HEADER_LEN 8
29 #define VXLAN_HEADER_LEN 8
31 #define IP_VERSION 0x40
32 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
33 #define IP_DEFTTL 64 /* from RFC 1340. */
34 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
36 #define IP_DN_FRAGMENT_FLAG 0x0040
38 /* Used to compare MAC addresses. */
39 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
41 /* Configurable number of RX/TX ring descriptors */
42 #define RTE_TEST_RX_DESC_DEFAULT 1024
43 #define RTE_TEST_TX_DESC_DEFAULT 512
45 /* Default inner VLAN ID */
46 #define INNER_VLAN_ID 100
49 struct vxlan_conf vxdev
;
51 struct ipv4_hdr app_ip_hdr
[VXLAN_N_PORTS
];
52 struct ether_hdr app_l2_hdr
[VXLAN_N_PORTS
];
54 /* local VTEP IP address */
55 uint8_t vxlan_multicast_ips
[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } };
57 /* Remote VTEP IP address */
58 uint8_t vxlan_overlay_ips
[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} };
60 /* Remote VTEP MAC address */
61 uint8_t peer_mac
[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01};
63 /* VXLAN RX filter type */
64 uint8_t tep_filter_type
[] = {RTE_TUNNEL_FILTER_IMAC_TENID
,
65 RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID
,
66 RTE_TUNNEL_FILTER_OMAC_TENID_IMAC
,};
68 /* Options for configuring ethernet port */
69 static struct rte_eth_conf port_conf
= {
72 .offloads
= DEV_RX_OFFLOAD_CRC_STRIP
,
75 .mq_mode
= ETH_MQ_TX_NONE
,
76 .offloads
= (DEV_TX_OFFLOAD_IPV4_CKSUM
|
77 DEV_TX_OFFLOAD_UDP_CKSUM
|
78 DEV_TX_OFFLOAD_TCP_CKSUM
|
79 DEV_TX_OFFLOAD_SCTP_CKSUM
|
80 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
|
81 DEV_TX_OFFLOAD_TCP_TSO
|
82 DEV_TX_OFFLOAD_MULTI_SEGS
|
83 DEV_TX_OFFLOAD_VXLAN_TNL_TSO
),
88 * The one or two device(s) that belongs to the same tenant ID can
89 * be assigned in a VM.
91 const uint16_t tenant_id_conf
[] = {
92 1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003,
93 1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007,
94 1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011,
95 1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015,
96 1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019,
97 1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023,
98 1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027,
99 1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031,
103 * Initialises a given port using global settings and with the rx buffers
104 * coming from the mbuf_pool passed as parameter
107 vxlan_port_init(uint16_t port
, struct rte_mempool
*mbuf_pool
)
111 struct rte_eth_dev_info dev_info
;
112 uint16_t rx_rings
, tx_rings
= (uint16_t)rte_lcore_count();
113 uint16_t rx_ring_size
= RTE_TEST_RX_DESC_DEFAULT
;
114 uint16_t tx_ring_size
= RTE_TEST_TX_DESC_DEFAULT
;
115 struct rte_eth_udp_tunnel tunnel_udp
;
116 struct rte_eth_rxconf
*rxconf
;
117 struct rte_eth_txconf
*txconf
;
118 struct vxlan_conf
*pconf
= &vxdev
;
119 struct rte_eth_conf local_port_conf
= port_conf
;
121 pconf
->dst_port
= udp_port
;
123 rte_eth_dev_info_get(port
, &dev_info
);
125 if (dev_info
.max_rx_queues
> MAX_QUEUES
) {
126 rte_exit(EXIT_FAILURE
,
127 "please define MAX_QUEUES no less than %u in %s\n",
128 dev_info
.max_rx_queues
, __FILE__
);
131 rxconf
= &dev_info
.default_rxconf
;
132 txconf
= &dev_info
.default_txconf
;
134 if (!rte_eth_dev_is_valid_port(port
))
137 rx_rings
= nb_devices
;
138 if (dev_info
.tx_offload_capa
& DEV_TX_OFFLOAD_MBUF_FAST_FREE
)
139 local_port_conf
.txmode
.offloads
|=
140 DEV_TX_OFFLOAD_MBUF_FAST_FREE
;
141 /* Configure ethernet device. */
142 retval
= rte_eth_dev_configure(port
, rx_rings
, tx_rings
,
147 retval
= rte_eth_dev_adjust_nb_rx_tx_desc(port
, &rx_ring_size
,
152 /* Setup the queues. */
153 rxconf
->offloads
= local_port_conf
.rxmode
.offloads
;
154 for (q
= 0; q
< rx_rings
; q
++) {
155 retval
= rte_eth_rx_queue_setup(port
, q
, rx_ring_size
,
156 rte_eth_dev_socket_id(port
),
162 txconf
->offloads
= local_port_conf
.txmode
.offloads
;
163 for (q
= 0; q
< tx_rings
; q
++) {
164 retval
= rte_eth_tx_queue_setup(port
, q
, tx_ring_size
,
165 rte_eth_dev_socket_id(port
),
171 /* Start the device. */
172 retval
= rte_eth_dev_start(port
);
176 /* Configure UDP port for UDP tunneling */
177 tunnel_udp
.udp_port
= udp_port
;
178 tunnel_udp
.prot_type
= RTE_TUNNEL_TYPE_VXLAN
;
179 retval
= rte_eth_dev_udp_tunnel_port_add(port
, &tunnel_udp
);
182 rte_eth_macaddr_get(port
, &ports_eth_addr
[port
]);
183 RTE_LOG(INFO
, PORT
, "Port %u MAC: %02"PRIx8
" %02"PRIx8
" %02"PRIx8
184 " %02"PRIx8
" %02"PRIx8
" %02"PRIx8
"\n",
186 ports_eth_addr
[port
].addr_bytes
[0],
187 ports_eth_addr
[port
].addr_bytes
[1],
188 ports_eth_addr
[port
].addr_bytes
[2],
189 ports_eth_addr
[port
].addr_bytes
[3],
190 ports_eth_addr
[port
].addr_bytes
[4],
191 ports_eth_addr
[port
].addr_bytes
[5]);
193 if (tso_segsz
!= 0) {
194 struct rte_eth_dev_info dev_info
;
195 rte_eth_dev_info_get(port
, &dev_info
);
196 if ((dev_info
.tx_offload_capa
& DEV_TX_OFFLOAD_TCP_TSO
) == 0)
197 RTE_LOG(WARNING
, PORT
,
198 "hardware TSO offload is not supported\n");
204 vxlan_rx_process(struct rte_mbuf
*pkt
)
209 ret
= decapsulation(pkt
);
215 vxlan_tx_process(uint8_t queue_id
, struct rte_mbuf
*pkt
)
218 encapsulation(pkt
, queue_id
);
224 * This function learns the MAC address of the device and set init
225 * L2 header and L3 header info.
228 vxlan_link(struct vhost_dev
*vdev
, struct rte_mbuf
*m
)
231 struct ether_hdr
*pkt_hdr
;
232 uint64_t portid
= vdev
->vid
;
235 struct rte_eth_tunnel_filter_conf tunnel_filter_conf
;
237 if (unlikely(portid
>= VXLAN_N_PORTS
)) {
238 RTE_LOG(INFO
, VHOST_DATA
,
239 "(%d) WARNING: Not configuring device,"
240 "as already have %d ports for VXLAN.",
241 vdev
->vid
, VXLAN_N_PORTS
);
245 /* Learn MAC address of guest device from packet */
246 pkt_hdr
= rte_pktmbuf_mtod(m
, struct ether_hdr
*);
247 if (is_same_ether_addr(&(pkt_hdr
->s_addr
), &vdev
->mac_address
)) {
248 RTE_LOG(INFO
, VHOST_DATA
,
249 "(%d) WARNING: This device is using an existing"
250 " MAC address and has not been registered.\n",
255 for (i
= 0; i
< ETHER_ADDR_LEN
; i
++) {
256 vdev
->mac_address
.addr_bytes
[i
] =
257 vxdev
.port
[portid
].vport_mac
.addr_bytes
[i
] =
258 pkt_hdr
->s_addr
.addr_bytes
[i
];
259 vxdev
.port
[portid
].peer_mac
.addr_bytes
[i
] = peer_mac
[i
];
262 memset(&tunnel_filter_conf
, 0,
263 sizeof(struct rte_eth_tunnel_filter_conf
));
265 ether_addr_copy(&ports_eth_addr
[0], &tunnel_filter_conf
.outer_mac
);
266 tunnel_filter_conf
.filter_type
= tep_filter_type
[filter_idx
];
269 ether_addr_copy(&vdev
->mac_address
, &tunnel_filter_conf
.inner_mac
);
271 tunnel_filter_conf
.queue_id
= vdev
->rx_q
;
272 tunnel_filter_conf
.tenant_id
= tenant_id_conf
[vdev
->rx_q
];
274 if (tep_filter_type
[filter_idx
] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID
)
275 tunnel_filter_conf
.inner_vlan
= INNER_VLAN_ID
;
277 tunnel_filter_conf
.tunnel_type
= RTE_TUNNEL_TYPE_VXLAN
;
279 ret
= rte_eth_dev_filter_ctrl(ports
[0],
280 RTE_ETH_FILTER_TUNNEL
,
282 &tunnel_filter_conf
);
284 RTE_LOG(ERR
, VHOST_DATA
,
285 "%d Failed to add device MAC address to cloud filter\n",
290 /* Print out inner MAC and VNI info. */
291 RTE_LOG(INFO
, VHOST_DATA
,
292 "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n",
294 vdev
->mac_address
.addr_bytes
[0],
295 vdev
->mac_address
.addr_bytes
[1],
296 vdev
->mac_address
.addr_bytes
[2],
297 vdev
->mac_address
.addr_bytes
[3],
298 vdev
->mac_address
.addr_bytes
[4],
299 vdev
->mac_address
.addr_bytes
[5],
300 tenant_id_conf
[vdev
->rx_q
]);
302 vxdev
.port
[portid
].vport_id
= portid
;
304 for (i
= 0; i
< 4; i
++) {
306 vxdev
.port_ip
|= vxlan_multicast_ips
[portid
][i
] << (8 * i
);
308 vxdev
.port
[portid
].peer_ip
|=
309 vxlan_overlay_ips
[portid
][i
] << (8 * i
);
312 vxdev
.out_key
= tenant_id_conf
[vdev
->rx_q
];
313 ether_addr_copy(&vxdev
.port
[portid
].peer_mac
,
314 &app_l2_hdr
[portid
].d_addr
);
315 ether_addr_copy(&ports_eth_addr
[0],
316 &app_l2_hdr
[portid
].s_addr
);
317 app_l2_hdr
[portid
].ether_type
= rte_cpu_to_be_16(ETHER_TYPE_IPv4
);
319 ip
= &app_ip_hdr
[portid
];
320 ip
->version_ihl
= IP_VHL_DEF
;
321 ip
->type_of_service
= 0;
322 ip
->total_length
= 0;
324 ip
->fragment_offset
= IP_DN_FRAGMENT_FLAG
;
325 ip
->time_to_live
= IP_DEFTTL
;
326 ip
->next_proto_id
= IPPROTO_UDP
;
327 ip
->hdr_checksum
= 0;
328 ip
->src_addr
= vxdev
.port_ip
;
329 ip
->dst_addr
= vxdev
.port
[portid
].peer_ip
;
331 /* Set device as ready for RX. */
332 vdev
->ready
= DEVICE_RX
;
338 * Removes cloud filter. Ensures that nothing is adding buffers to the RX
339 * queue before disabling RX on the device.
342 vxlan_unlink(struct vhost_dev
*vdev
)
344 unsigned i
= 0, rx_count
;
346 struct rte_mbuf
*pkts_burst
[MAX_PKT_BURST
];
347 struct rte_eth_tunnel_filter_conf tunnel_filter_conf
;
349 if (vdev
->ready
== DEVICE_RX
) {
350 memset(&tunnel_filter_conf
, 0,
351 sizeof(struct rte_eth_tunnel_filter_conf
));
353 ether_addr_copy(&ports_eth_addr
[0], &tunnel_filter_conf
.outer_mac
);
354 ether_addr_copy(&vdev
->mac_address
, &tunnel_filter_conf
.inner_mac
);
355 tunnel_filter_conf
.tenant_id
= tenant_id_conf
[vdev
->rx_q
];
356 tunnel_filter_conf
.filter_type
= tep_filter_type
[filter_idx
];
358 if (tep_filter_type
[filter_idx
] ==
359 RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID
)
360 tunnel_filter_conf
.inner_vlan
= INNER_VLAN_ID
;
362 tunnel_filter_conf
.queue_id
= vdev
->rx_q
;
363 tunnel_filter_conf
.tunnel_type
= RTE_TUNNEL_TYPE_VXLAN
;
365 ret
= rte_eth_dev_filter_ctrl(ports
[0],
366 RTE_ETH_FILTER_TUNNEL
,
367 RTE_ETH_FILTER_DELETE
,
368 &tunnel_filter_conf
);
370 RTE_LOG(ERR
, VHOST_DATA
,
371 "%d Failed to add device MAC address to cloud filter\n",
375 for (i
= 0; i
< ETHER_ADDR_LEN
; i
++)
376 vdev
->mac_address
.addr_bytes
[i
] = 0;
378 /* Clear out the receive buffers */
379 rx_count
= rte_eth_rx_burst(ports
[0],
380 (uint16_t)vdev
->rx_q
,
381 pkts_burst
, MAX_PKT_BURST
);
384 for (i
= 0; i
< rx_count
; i
++)
385 rte_pktmbuf_free(pkts_burst
[i
]);
387 rx_count
= rte_eth_rx_burst(ports
[0],
388 (uint16_t)vdev
->rx_q
,
389 pkts_burst
, MAX_PKT_BURST
);
391 vdev
->ready
= DEVICE_MAC_LEARNING
;
395 /* Transmit packets after encapsulating */
397 vxlan_tx_pkts(uint16_t port_id
, uint16_t queue_id
,
398 struct rte_mbuf
**tx_pkts
, uint16_t nb_pkts
) {
402 for (i
= 0; i
< nb_pkts
; i
++)
403 vxlan_tx_process(queue_id
, tx_pkts
[i
]);
405 ret
= rte_eth_tx_burst(port_id
, queue_id
, tx_pkts
, nb_pkts
);
410 /* Check for decapsulation and pass packets directly to VIRTIO device */
412 vxlan_rx_pkts(int vid
, struct rte_mbuf
**pkts_burst
, uint32_t rx_count
)
417 struct rte_mbuf
*pkts_valid
[rx_count
];
419 for (i
= 0; i
< rx_count
; i
++) {
422 &dev_statistics
[vid
].rx_bad_ip_csum
,
423 (pkts_burst
[i
]->ol_flags
& PKT_RX_IP_CKSUM_BAD
)
426 &dev_statistics
[vid
].rx_bad_ip_csum
,
427 (pkts_burst
[i
]->ol_flags
& PKT_RX_L4_CKSUM_BAD
)
430 ret
= vxlan_rx_process(pkts_burst
[i
]);
431 if (unlikely(ret
< 0))
434 pkts_valid
[count
] = pkts_burst
[i
];
438 ret
= rte_vhost_enqueue_burst(vid
, VIRTIO_RXQ
, pkts_valid
, count
);