1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016 IGEL Co., Ltd.
3 * Copyright(c) 2016-2018 Intel Corporation
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_malloc.h>
13 #include <rte_memcpy.h>
14 #include <rte_bus_vdev.h>
15 #include <rte_kvargs.h>
16 #include <rte_vhost.h>
17 #include <rte_spinlock.h>
19 #include "rte_eth_vhost.h"
21 static int vhost_logtype
;
23 #define VHOST_LOG(level, ...) \
24 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
26 enum {VIRTIO_RXQ
, VIRTIO_TXQ
, VIRTIO_QNUM
};
28 #define ETH_VHOST_IFACE_ARG "iface"
29 #define ETH_VHOST_QUEUES_ARG "queues"
30 #define ETH_VHOST_CLIENT_ARG "client"
31 #define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy"
32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support"
33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support"
34 #define VHOST_MAX_PKT_BURST 32
36 static const char *valid_arguments
[] = {
40 ETH_VHOST_DEQUEUE_ZERO_COPY
,
41 ETH_VHOST_IOMMU_SUPPORT
,
42 ETH_VHOST_POSTCOPY_SUPPORT
,
46 static struct ether_addr base_eth_addr
= {
57 enum vhost_xstats_pkts
{
58 VHOST_UNDERSIZE_PKT
= 0,
63 VHOST_512_TO_1023_PKT
,
64 VHOST_1024_TO_1522_PKT
,
65 VHOST_1523_TO_MAX_PKT
,
70 VHOST_ERRORS_FRAGMENTED
,
72 VHOST_UNKNOWN_PROTOCOL
,
80 uint64_t xstats
[VHOST_XSTATS_MAX
];
85 rte_atomic32_t allow_queuing
;
86 rte_atomic32_t while_queuing
;
87 struct pmd_internal
*internal
;
88 struct rte_mempool
*mb_pool
;
90 uint16_t virtqueue_id
;
91 struct vhost_stats stats
;
95 rte_atomic32_t dev_attached
;
100 rte_atomic32_t started
;
104 struct internal_list
{
105 TAILQ_ENTRY(internal_list
) next
;
106 struct rte_eth_dev
*eth_dev
;
109 TAILQ_HEAD(internal_list_head
, internal_list
);
110 static struct internal_list_head internal_list
=
111 TAILQ_HEAD_INITIALIZER(internal_list
);
113 static pthread_mutex_t internal_list_lock
= PTHREAD_MUTEX_INITIALIZER
;
115 static struct rte_eth_link pmd_link
= {
117 .link_duplex
= ETH_LINK_FULL_DUPLEX
,
118 .link_status
= ETH_LINK_DOWN
121 struct rte_vhost_vring_state
{
124 bool cur
[RTE_MAX_QUEUES_PER_PORT
* 2];
125 bool seen
[RTE_MAX_QUEUES_PER_PORT
* 2];
127 unsigned int max_vring
;
130 static struct rte_vhost_vring_state
*vring_states
[RTE_MAX_ETHPORTS
];
132 #define VHOST_XSTATS_NAME_SIZE 64
134 struct vhost_xstats_name_off
{
135 char name
[VHOST_XSTATS_NAME_SIZE
];
139 /* [rx]_is prepended to the name string here */
140 static const struct vhost_xstats_name_off vhost_rxport_stat_strings
[] = {
142 offsetof(struct vhost_queue
, stats
.pkts
)},
144 offsetof(struct vhost_queue
, stats
.bytes
)},
146 offsetof(struct vhost_queue
, stats
.missed_pkts
)},
147 {"broadcast_packets",
148 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_BROADCAST_PKT
])},
149 {"multicast_packets",
150 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_MULTICAST_PKT
])},
152 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_UNICAST_PKT
])},
153 {"undersize_packets",
154 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_UNDERSIZE_PKT
])},
156 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_64_PKT
])},
157 {"size_65_to_127_packets",
158 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_65_TO_127_PKT
])},
159 {"size_128_to_255_packets",
160 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_128_TO_255_PKT
])},
161 {"size_256_to_511_packets",
162 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_256_TO_511_PKT
])},
163 {"size_512_to_1023_packets",
164 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_512_TO_1023_PKT
])},
165 {"size_1024_to_1522_packets",
166 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_1024_TO_1522_PKT
])},
167 {"size_1523_to_max_packets",
168 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_1523_TO_MAX_PKT
])},
169 {"errors_with_bad_CRC",
170 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_ERRORS_PKT
])},
171 {"fragmented_errors",
172 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_ERRORS_FRAGMENTED
])},
174 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_ERRORS_JABBER
])},
175 {"unknown_protos_packets",
176 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_UNKNOWN_PROTOCOL
])},
179 /* [tx]_ is prepended to the name string here */
180 static const struct vhost_xstats_name_off vhost_txport_stat_strings
[] = {
182 offsetof(struct vhost_queue
, stats
.pkts
)},
184 offsetof(struct vhost_queue
, stats
.bytes
)},
186 offsetof(struct vhost_queue
, stats
.missed_pkts
)},
187 {"broadcast_packets",
188 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_BROADCAST_PKT
])},
189 {"multicast_packets",
190 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_MULTICAST_PKT
])},
192 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_UNICAST_PKT
])},
193 {"undersize_packets",
194 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_UNDERSIZE_PKT
])},
196 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_64_PKT
])},
197 {"size_65_to_127_packets",
198 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_65_TO_127_PKT
])},
199 {"size_128_to_255_packets",
200 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_128_TO_255_PKT
])},
201 {"size_256_to_511_packets",
202 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_256_TO_511_PKT
])},
203 {"size_512_to_1023_packets",
204 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_512_TO_1023_PKT
])},
205 {"size_1024_to_1522_packets",
206 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_1024_TO_1522_PKT
])},
207 {"size_1523_to_max_packets",
208 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_1523_TO_MAX_PKT
])},
209 {"errors_with_bad_CRC",
210 offsetof(struct vhost_queue
, stats
.xstats
[VHOST_ERRORS_PKT
])},
213 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \
214 sizeof(vhost_rxport_stat_strings[0]))
216 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \
217 sizeof(vhost_txport_stat_strings[0]))
220 vhost_dev_xstats_reset(struct rte_eth_dev
*dev
)
222 struct vhost_queue
*vq
= NULL
;
225 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
226 vq
= dev
->data
->rx_queues
[i
];
229 memset(&vq
->stats
, 0, sizeof(vq
->stats
));
231 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
232 vq
= dev
->data
->tx_queues
[i
];
235 memset(&vq
->stats
, 0, sizeof(vq
->stats
));
240 vhost_dev_xstats_get_names(struct rte_eth_dev
*dev __rte_unused
,
241 struct rte_eth_xstat_name
*xstats_names
,
242 unsigned int limit __rte_unused
)
246 int nstats
= VHOST_NB_XSTATS_RXPORT
+ VHOST_NB_XSTATS_TXPORT
;
250 for (t
= 0; t
< VHOST_NB_XSTATS_RXPORT
; t
++) {
251 snprintf(xstats_names
[count
].name
,
252 sizeof(xstats_names
[count
].name
),
253 "rx_%s", vhost_rxport_stat_strings
[t
].name
);
256 for (t
= 0; t
< VHOST_NB_XSTATS_TXPORT
; t
++) {
257 snprintf(xstats_names
[count
].name
,
258 sizeof(xstats_names
[count
].name
),
259 "tx_%s", vhost_txport_stat_strings
[t
].name
);
266 vhost_dev_xstats_get(struct rte_eth_dev
*dev
, struct rte_eth_xstat
*xstats
,
271 unsigned int count
= 0;
272 struct vhost_queue
*vq
= NULL
;
273 unsigned int nxstats
= VHOST_NB_XSTATS_RXPORT
+ VHOST_NB_XSTATS_TXPORT
;
278 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
279 vq
= dev
->data
->rx_queues
[i
];
282 vq
->stats
.xstats
[VHOST_UNICAST_PKT
] = vq
->stats
.pkts
283 - (vq
->stats
.xstats
[VHOST_BROADCAST_PKT
]
284 + vq
->stats
.xstats
[VHOST_MULTICAST_PKT
]);
286 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
287 vq
= dev
->data
->tx_queues
[i
];
290 vq
->stats
.xstats
[VHOST_UNICAST_PKT
] = vq
->stats
.pkts
291 + vq
->stats
.missed_pkts
292 - (vq
->stats
.xstats
[VHOST_BROADCAST_PKT
]
293 + vq
->stats
.xstats
[VHOST_MULTICAST_PKT
]);
295 for (t
= 0; t
< VHOST_NB_XSTATS_RXPORT
; t
++) {
296 xstats
[count
].value
= 0;
297 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
298 vq
= dev
->data
->rx_queues
[i
];
301 xstats
[count
].value
+=
302 *(uint64_t *)(((char *)vq
)
303 + vhost_rxport_stat_strings
[t
].offset
);
305 xstats
[count
].id
= count
;
308 for (t
= 0; t
< VHOST_NB_XSTATS_TXPORT
; t
++) {
309 xstats
[count
].value
= 0;
310 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
311 vq
= dev
->data
->tx_queues
[i
];
314 xstats
[count
].value
+=
315 *(uint64_t *)(((char *)vq
)
316 + vhost_txport_stat_strings
[t
].offset
);
318 xstats
[count
].id
= count
;
325 vhost_count_multicast_broadcast(struct vhost_queue
*vq
,
326 struct rte_mbuf
*mbuf
)
328 struct ether_addr
*ea
= NULL
;
329 struct vhost_stats
*pstats
= &vq
->stats
;
331 ea
= rte_pktmbuf_mtod(mbuf
, struct ether_addr
*);
332 if (is_multicast_ether_addr(ea
)) {
333 if (is_broadcast_ether_addr(ea
))
334 pstats
->xstats
[VHOST_BROADCAST_PKT
]++;
336 pstats
->xstats
[VHOST_MULTICAST_PKT
]++;
341 vhost_update_packet_xstats(struct vhost_queue
*vq
,
342 struct rte_mbuf
**bufs
,
345 uint32_t pkt_len
= 0;
348 struct vhost_stats
*pstats
= &vq
->stats
;
350 for (i
= 0; i
< count
; i
++) {
351 pkt_len
= bufs
[i
]->pkt_len
;
353 pstats
->xstats
[VHOST_64_PKT
]++;
354 } else if (pkt_len
> 64 && pkt_len
< 1024) {
355 index
= (sizeof(pkt_len
) * 8)
356 - __builtin_clz(pkt_len
) - 5;
357 pstats
->xstats
[index
]++;
360 pstats
->xstats
[VHOST_UNDERSIZE_PKT
]++;
361 else if (pkt_len
<= 1522)
362 pstats
->xstats
[VHOST_1024_TO_1522_PKT
]++;
363 else if (pkt_len
> 1522)
364 pstats
->xstats
[VHOST_1523_TO_MAX_PKT
]++;
366 vhost_count_multicast_broadcast(vq
, bufs
[i
]);
371 eth_vhost_rx(void *q
, struct rte_mbuf
**bufs
, uint16_t nb_bufs
)
373 struct vhost_queue
*r
= q
;
374 uint16_t i
, nb_rx
= 0;
375 uint16_t nb_receive
= nb_bufs
;
377 if (unlikely(rte_atomic32_read(&r
->allow_queuing
) == 0))
380 rte_atomic32_set(&r
->while_queuing
, 1);
382 if (unlikely(rte_atomic32_read(&r
->allow_queuing
) == 0))
385 /* Dequeue packets from guest TX queue */
388 uint16_t num
= (uint16_t)RTE_MIN(nb_receive
,
389 VHOST_MAX_PKT_BURST
);
391 nb_pkts
= rte_vhost_dequeue_burst(r
->vid
, r
->virtqueue_id
,
392 r
->mb_pool
, &bufs
[nb_rx
],
396 nb_receive
-= nb_pkts
;
401 r
->stats
.pkts
+= nb_rx
;
403 for (i
= 0; likely(i
< nb_rx
); i
++) {
404 bufs
[i
]->port
= r
->port
;
405 bufs
[i
]->vlan_tci
= 0;
407 if (r
->internal
->vlan_strip
)
408 rte_vlan_strip(bufs
[i
]);
410 r
->stats
.bytes
+= bufs
[i
]->pkt_len
;
413 vhost_update_packet_xstats(r
, bufs
, nb_rx
);
416 rte_atomic32_set(&r
->while_queuing
, 0);
422 eth_vhost_tx(void *q
, struct rte_mbuf
**bufs
, uint16_t nb_bufs
)
424 struct vhost_queue
*r
= q
;
425 uint16_t i
, nb_tx
= 0;
426 uint16_t nb_send
= 0;
428 if (unlikely(rte_atomic32_read(&r
->allow_queuing
) == 0))
431 rte_atomic32_set(&r
->while_queuing
, 1);
433 if (unlikely(rte_atomic32_read(&r
->allow_queuing
) == 0))
436 for (i
= 0; i
< nb_bufs
; i
++) {
437 struct rte_mbuf
*m
= bufs
[i
];
439 /* Do VLAN tag insertion */
440 if (m
->ol_flags
& PKT_TX_VLAN_PKT
) {
441 int error
= rte_vlan_insert(&m
);
442 if (unlikely(error
)) {
452 /* Enqueue packets to guest RX queue */
455 uint16_t num
= (uint16_t)RTE_MIN(nb_send
,
456 VHOST_MAX_PKT_BURST
);
458 nb_pkts
= rte_vhost_enqueue_burst(r
->vid
, r
->virtqueue_id
,
467 r
->stats
.pkts
+= nb_tx
;
468 r
->stats
.missed_pkts
+= nb_bufs
- nb_tx
;
470 for (i
= 0; likely(i
< nb_tx
); i
++)
471 r
->stats
.bytes
+= bufs
[i
]->pkt_len
;
473 vhost_update_packet_xstats(r
, bufs
, nb_tx
);
475 /* According to RFC2863 page42 section ifHCOutMulticastPkts and
476 * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast"
477 * are increased when packets are not transmitted successfully.
479 for (i
= nb_tx
; i
< nb_bufs
; i
++)
480 vhost_count_multicast_broadcast(r
, bufs
[i
]);
482 for (i
= 0; likely(i
< nb_tx
); i
++)
483 rte_pktmbuf_free(bufs
[i
]);
485 rte_atomic32_set(&r
->while_queuing
, 0);
491 eth_dev_configure(struct rte_eth_dev
*dev __rte_unused
)
493 struct pmd_internal
*internal
= dev
->data
->dev_private
;
494 const struct rte_eth_rxmode
*rxmode
= &dev
->data
->dev_conf
.rxmode
;
496 internal
->vlan_strip
= !!(rxmode
->offloads
& DEV_RX_OFFLOAD_VLAN_STRIP
);
501 static inline struct internal_list
*
502 find_internal_resource(char *ifname
)
505 struct internal_list
*list
;
506 struct pmd_internal
*internal
;
511 pthread_mutex_lock(&internal_list_lock
);
513 TAILQ_FOREACH(list
, &internal_list
, next
) {
514 internal
= list
->eth_dev
->data
->dev_private
;
515 if (!strcmp(internal
->iface_name
, ifname
)) {
521 pthread_mutex_unlock(&internal_list_lock
);
530 eth_rxq_intr_enable(struct rte_eth_dev
*dev
, uint16_t qid
)
532 struct rte_vhost_vring vring
;
533 struct vhost_queue
*vq
;
536 vq
= dev
->data
->rx_queues
[qid
];
538 VHOST_LOG(ERR
, "rxq%d is not setup yet\n", qid
);
542 ret
= rte_vhost_get_vhost_vring(vq
->vid
, (qid
<< 1) + 1, &vring
);
544 VHOST_LOG(ERR
, "Failed to get rxq%d's vring\n", qid
);
547 VHOST_LOG(INFO
, "Enable interrupt for rxq%d\n", qid
);
548 rte_vhost_enable_guest_notification(vq
->vid
, (qid
<< 1) + 1, 1);
555 eth_rxq_intr_disable(struct rte_eth_dev
*dev
, uint16_t qid
)
557 struct rte_vhost_vring vring
;
558 struct vhost_queue
*vq
;
561 vq
= dev
->data
->rx_queues
[qid
];
563 VHOST_LOG(ERR
, "rxq%d is not setup yet\n", qid
);
567 ret
= rte_vhost_get_vhost_vring(vq
->vid
, (qid
<< 1) + 1, &vring
);
569 VHOST_LOG(ERR
, "Failed to get rxq%d's vring", qid
);
572 VHOST_LOG(INFO
, "Disable interrupt for rxq%d\n", qid
);
573 rte_vhost_enable_guest_notification(vq
->vid
, (qid
<< 1) + 1, 0);
580 eth_vhost_uninstall_intr(struct rte_eth_dev
*dev
)
582 struct rte_intr_handle
*intr_handle
= dev
->intr_handle
;
585 if (intr_handle
->intr_vec
)
586 free(intr_handle
->intr_vec
);
590 dev
->intr_handle
= NULL
;
594 eth_vhost_install_intr(struct rte_eth_dev
*dev
)
596 struct rte_vhost_vring vring
;
597 struct vhost_queue
*vq
;
599 int nb_rxq
= dev
->data
->nb_rx_queues
;
603 /* uninstall firstly if we are reconnecting */
604 if (dev
->intr_handle
)
605 eth_vhost_uninstall_intr(dev
);
607 dev
->intr_handle
= malloc(sizeof(*dev
->intr_handle
));
608 if (!dev
->intr_handle
) {
609 VHOST_LOG(ERR
, "Fail to allocate intr_handle\n");
612 memset(dev
->intr_handle
, 0, sizeof(*dev
->intr_handle
));
614 dev
->intr_handle
->efd_counter_size
= sizeof(uint64_t);
616 dev
->intr_handle
->intr_vec
=
617 malloc(nb_rxq
* sizeof(dev
->intr_handle
->intr_vec
[0]));
619 if (!dev
->intr_handle
->intr_vec
) {
621 "Failed to allocate memory for interrupt vector\n");
622 free(dev
->intr_handle
);
626 VHOST_LOG(INFO
, "Prepare intr vec\n");
627 for (i
= 0; i
< nb_rxq
; i
++) {
628 vq
= dev
->data
->rx_queues
[i
];
630 VHOST_LOG(INFO
, "rxq-%d not setup yet, skip!\n", i
);
634 ret
= rte_vhost_get_vhost_vring(vq
->vid
, (i
<< 1) + 1, &vring
);
637 "Failed to get rxq-%d's vring, skip!\n", i
);
641 if (vring
.kickfd
< 0) {
643 "rxq-%d's kickfd is invalid, skip!\n", i
);
646 dev
->intr_handle
->intr_vec
[i
] = RTE_INTR_VEC_RXTX_OFFSET
+ i
;
647 dev
->intr_handle
->efds
[i
] = vring
.kickfd
;
649 VHOST_LOG(INFO
, "Installed intr vec for rxq-%d\n", i
);
652 dev
->intr_handle
->nb_efd
= count
;
653 dev
->intr_handle
->max_intr
= count
+ 1;
654 dev
->intr_handle
->type
= RTE_INTR_HANDLE_VDEV
;
660 update_queuing_status(struct rte_eth_dev
*dev
)
662 struct pmd_internal
*internal
= dev
->data
->dev_private
;
663 struct vhost_queue
*vq
;
665 int allow_queuing
= 1;
667 if (!dev
->data
->rx_queues
|| !dev
->data
->tx_queues
)
670 if (rte_atomic32_read(&internal
->started
) == 0 ||
671 rte_atomic32_read(&internal
->dev_attached
) == 0)
674 /* Wait until rx/tx_pkt_burst stops accessing vhost device */
675 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
676 vq
= dev
->data
->rx_queues
[i
];
679 rte_atomic32_set(&vq
->allow_queuing
, allow_queuing
);
680 while (rte_atomic32_read(&vq
->while_queuing
))
684 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
685 vq
= dev
->data
->tx_queues
[i
];
688 rte_atomic32_set(&vq
->allow_queuing
, allow_queuing
);
689 while (rte_atomic32_read(&vq
->while_queuing
))
695 queue_setup(struct rte_eth_dev
*eth_dev
, struct pmd_internal
*internal
)
697 struct vhost_queue
*vq
;
700 for (i
= 0; i
< eth_dev
->data
->nb_rx_queues
; i
++) {
701 vq
= eth_dev
->data
->rx_queues
[i
];
704 vq
->vid
= internal
->vid
;
705 vq
->internal
= internal
;
706 vq
->port
= eth_dev
->data
->port_id
;
708 for (i
= 0; i
< eth_dev
->data
->nb_tx_queues
; i
++) {
709 vq
= eth_dev
->data
->tx_queues
[i
];
712 vq
->vid
= internal
->vid
;
713 vq
->internal
= internal
;
714 vq
->port
= eth_dev
->data
->port_id
;
721 struct rte_eth_dev
*eth_dev
;
722 struct internal_list
*list
;
723 struct pmd_internal
*internal
;
724 struct rte_eth_conf
*dev_conf
;
726 char ifname
[PATH_MAX
];
727 #ifdef RTE_LIBRTE_VHOST_NUMA
731 rte_vhost_get_ifname(vid
, ifname
, sizeof(ifname
));
732 list
= find_internal_resource(ifname
);
734 VHOST_LOG(INFO
, "Invalid device name: %s\n", ifname
);
738 eth_dev
= list
->eth_dev
;
739 internal
= eth_dev
->data
->dev_private
;
740 dev_conf
= ð_dev
->data
->dev_conf
;
742 #ifdef RTE_LIBRTE_VHOST_NUMA
743 newnode
= rte_vhost_get_numa_node(vid
);
745 eth_dev
->data
->numa_node
= newnode
;
749 if (rte_atomic32_read(&internal
->started
) == 1) {
750 queue_setup(eth_dev
, internal
);
752 if (dev_conf
->intr_conf
.rxq
) {
753 if (eth_vhost_install_intr(eth_dev
) < 0) {
755 "Failed to install interrupt handler.");
760 VHOST_LOG(INFO
, "RX/TX queues not exist yet\n");
763 for (i
= 0; i
< rte_vhost_get_vring_num(vid
); i
++)
764 rte_vhost_enable_guest_notification(vid
, i
, 0);
766 rte_vhost_get_mtu(vid
, ð_dev
->data
->mtu
);
768 eth_dev
->data
->dev_link
.link_status
= ETH_LINK_UP
;
770 rte_atomic32_set(&internal
->dev_attached
, 1);
771 update_queuing_status(eth_dev
);
773 VHOST_LOG(INFO
, "Vhost device %d created\n", vid
);
775 _rte_eth_dev_callback_process(eth_dev
, RTE_ETH_EVENT_INTR_LSC
, NULL
);
781 destroy_device(int vid
)
783 struct rte_eth_dev
*eth_dev
;
784 struct pmd_internal
*internal
;
785 struct vhost_queue
*vq
;
786 struct internal_list
*list
;
787 char ifname
[PATH_MAX
];
789 struct rte_vhost_vring_state
*state
;
791 rte_vhost_get_ifname(vid
, ifname
, sizeof(ifname
));
792 list
= find_internal_resource(ifname
);
794 VHOST_LOG(ERR
, "Invalid interface name: %s\n", ifname
);
797 eth_dev
= list
->eth_dev
;
798 internal
= eth_dev
->data
->dev_private
;
800 rte_atomic32_set(&internal
->dev_attached
, 0);
801 update_queuing_status(eth_dev
);
803 eth_dev
->data
->dev_link
.link_status
= ETH_LINK_DOWN
;
805 if (eth_dev
->data
->rx_queues
&& eth_dev
->data
->tx_queues
) {
806 for (i
= 0; i
< eth_dev
->data
->nb_rx_queues
; i
++) {
807 vq
= eth_dev
->data
->rx_queues
[i
];
812 for (i
= 0; i
< eth_dev
->data
->nb_tx_queues
; i
++) {
813 vq
= eth_dev
->data
->tx_queues
[i
];
820 state
= vring_states
[eth_dev
->data
->port_id
];
821 rte_spinlock_lock(&state
->lock
);
822 for (i
= 0; i
<= state
->max_vring
; i
++) {
823 state
->cur
[i
] = false;
824 state
->seen
[i
] = false;
826 state
->max_vring
= 0;
827 rte_spinlock_unlock(&state
->lock
);
829 VHOST_LOG(INFO
, "Vhost device %d destroyed\n", vid
);
830 eth_vhost_uninstall_intr(eth_dev
);
832 _rte_eth_dev_callback_process(eth_dev
, RTE_ETH_EVENT_INTR_LSC
, NULL
);
836 vring_state_changed(int vid
, uint16_t vring
, int enable
)
838 struct rte_vhost_vring_state
*state
;
839 struct rte_eth_dev
*eth_dev
;
840 struct internal_list
*list
;
841 char ifname
[PATH_MAX
];
843 rte_vhost_get_ifname(vid
, ifname
, sizeof(ifname
));
844 list
= find_internal_resource(ifname
);
846 VHOST_LOG(ERR
, "Invalid interface name: %s\n", ifname
);
850 eth_dev
= list
->eth_dev
;
852 state
= vring_states
[eth_dev
->data
->port_id
];
853 rte_spinlock_lock(&state
->lock
);
854 state
->cur
[vring
] = enable
;
855 state
->max_vring
= RTE_MAX(vring
, state
->max_vring
);
856 rte_spinlock_unlock(&state
->lock
);
858 VHOST_LOG(INFO
, "vring%u is %s\n",
859 vring
, enable
? "enabled" : "disabled");
861 _rte_eth_dev_callback_process(eth_dev
, RTE_ETH_EVENT_QUEUE_STATE
, NULL
);
866 static struct vhost_device_ops vhost_ops
= {
867 .new_device
= new_device
,
868 .destroy_device
= destroy_device
,
869 .vring_state_changed
= vring_state_changed
,
873 rte_eth_vhost_get_queue_event(uint16_t port_id
,
874 struct rte_eth_vhost_queue_event
*event
)
876 struct rte_vhost_vring_state
*state
;
880 if (port_id
>= RTE_MAX_ETHPORTS
) {
881 VHOST_LOG(ERR
, "Invalid port id\n");
885 state
= vring_states
[port_id
];
887 VHOST_LOG(ERR
, "Unused port\n");
891 rte_spinlock_lock(&state
->lock
);
892 for (i
= 0; i
<= state
->max_vring
; i
++) {
893 idx
= state
->index
++ % (state
->max_vring
+ 1);
895 if (state
->cur
[idx
] != state
->seen
[idx
]) {
896 state
->seen
[idx
] = state
->cur
[idx
];
897 event
->queue_id
= idx
/ 2;
899 event
->enable
= state
->cur
[idx
];
900 rte_spinlock_unlock(&state
->lock
);
904 rte_spinlock_unlock(&state
->lock
);
910 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id
)
912 struct internal_list
*list
;
913 struct rte_eth_dev
*eth_dev
;
914 struct vhost_queue
*vq
;
917 if (!rte_eth_dev_is_valid_port(port_id
))
920 pthread_mutex_lock(&internal_list_lock
);
922 TAILQ_FOREACH(list
, &internal_list
, next
) {
923 eth_dev
= list
->eth_dev
;
924 if (eth_dev
->data
->port_id
== port_id
) {
925 vq
= eth_dev
->data
->rx_queues
[0];
933 pthread_mutex_unlock(&internal_list_lock
);
939 eth_dev_start(struct rte_eth_dev
*eth_dev
)
941 struct pmd_internal
*internal
= eth_dev
->data
->dev_private
;
942 struct rte_eth_conf
*dev_conf
= ð_dev
->data
->dev_conf
;
944 queue_setup(eth_dev
, internal
);
946 if (rte_atomic32_read(&internal
->dev_attached
) == 1) {
947 if (dev_conf
->intr_conf
.rxq
) {
948 if (eth_vhost_install_intr(eth_dev
) < 0) {
950 "Failed to install interrupt handler.");
956 rte_atomic32_set(&internal
->started
, 1);
957 update_queuing_status(eth_dev
);
963 eth_dev_stop(struct rte_eth_dev
*dev
)
965 struct pmd_internal
*internal
= dev
->data
->dev_private
;
967 rte_atomic32_set(&internal
->started
, 0);
968 update_queuing_status(dev
);
972 eth_dev_close(struct rte_eth_dev
*dev
)
974 struct pmd_internal
*internal
;
975 struct internal_list
*list
;
978 internal
= dev
->data
->dev_private
;
984 rte_vhost_driver_unregister(internal
->iface_name
);
986 list
= find_internal_resource(internal
->iface_name
);
990 pthread_mutex_lock(&internal_list_lock
);
991 TAILQ_REMOVE(&internal_list
, list
, next
);
992 pthread_mutex_unlock(&internal_list_lock
);
995 if (dev
->data
->rx_queues
)
996 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++)
997 rte_free(dev
->data
->rx_queues
[i
]);
999 if (dev
->data
->tx_queues
)
1000 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
1001 rte_free(dev
->data
->tx_queues
[i
]);
1003 free(internal
->dev_name
);
1004 free(internal
->iface_name
);
1007 dev
->data
->dev_private
= NULL
;
1011 eth_rx_queue_setup(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
,
1012 uint16_t nb_rx_desc __rte_unused
,
1013 unsigned int socket_id
,
1014 const struct rte_eth_rxconf
*rx_conf __rte_unused
,
1015 struct rte_mempool
*mb_pool
)
1017 struct vhost_queue
*vq
;
1019 vq
= rte_zmalloc_socket(NULL
, sizeof(struct vhost_queue
),
1020 RTE_CACHE_LINE_SIZE
, socket_id
);
1022 VHOST_LOG(ERR
, "Failed to allocate memory for rx queue\n");
1026 vq
->mb_pool
= mb_pool
;
1027 vq
->virtqueue_id
= rx_queue_id
* VIRTIO_QNUM
+ VIRTIO_TXQ
;
1028 dev
->data
->rx_queues
[rx_queue_id
] = vq
;
1034 eth_tx_queue_setup(struct rte_eth_dev
*dev
, uint16_t tx_queue_id
,
1035 uint16_t nb_tx_desc __rte_unused
,
1036 unsigned int socket_id
,
1037 const struct rte_eth_txconf
*tx_conf __rte_unused
)
1039 struct vhost_queue
*vq
;
1041 vq
= rte_zmalloc_socket(NULL
, sizeof(struct vhost_queue
),
1042 RTE_CACHE_LINE_SIZE
, socket_id
);
1044 VHOST_LOG(ERR
, "Failed to allocate memory for tx queue\n");
1048 vq
->virtqueue_id
= tx_queue_id
* VIRTIO_QNUM
+ VIRTIO_RXQ
;
1049 dev
->data
->tx_queues
[tx_queue_id
] = vq
;
1055 eth_dev_info(struct rte_eth_dev
*dev
,
1056 struct rte_eth_dev_info
*dev_info
)
1058 struct pmd_internal
*internal
;
1060 internal
= dev
->data
->dev_private
;
1061 if (internal
== NULL
) {
1062 VHOST_LOG(ERR
, "Invalid device specified\n");
1066 dev_info
->max_mac_addrs
= 1;
1067 dev_info
->max_rx_pktlen
= (uint32_t)-1;
1068 dev_info
->max_rx_queues
= internal
->max_queues
;
1069 dev_info
->max_tx_queues
= internal
->max_queues
;
1070 dev_info
->min_rx_bufsize
= 0;
1072 dev_info
->tx_offload_capa
= DEV_TX_OFFLOAD_MULTI_SEGS
|
1073 DEV_TX_OFFLOAD_VLAN_INSERT
;
1074 dev_info
->rx_offload_capa
= DEV_RX_OFFLOAD_VLAN_STRIP
;
1078 eth_stats_get(struct rte_eth_dev
*dev
, struct rte_eth_stats
*stats
)
1081 unsigned long rx_total
= 0, tx_total
= 0, tx_missed_total
= 0;
1082 unsigned long rx_total_bytes
= 0, tx_total_bytes
= 0;
1083 struct vhost_queue
*vq
;
1085 for (i
= 0; i
< RTE_ETHDEV_QUEUE_STAT_CNTRS
&&
1086 i
< dev
->data
->nb_rx_queues
; i
++) {
1087 if (dev
->data
->rx_queues
[i
] == NULL
)
1089 vq
= dev
->data
->rx_queues
[i
];
1090 stats
->q_ipackets
[i
] = vq
->stats
.pkts
;
1091 rx_total
+= stats
->q_ipackets
[i
];
1093 stats
->q_ibytes
[i
] = vq
->stats
.bytes
;
1094 rx_total_bytes
+= stats
->q_ibytes
[i
];
1097 for (i
= 0; i
< RTE_ETHDEV_QUEUE_STAT_CNTRS
&&
1098 i
< dev
->data
->nb_tx_queues
; i
++) {
1099 if (dev
->data
->tx_queues
[i
] == NULL
)
1101 vq
= dev
->data
->tx_queues
[i
];
1102 stats
->q_opackets
[i
] = vq
->stats
.pkts
;
1103 tx_missed_total
+= vq
->stats
.missed_pkts
;
1104 tx_total
+= stats
->q_opackets
[i
];
1106 stats
->q_obytes
[i
] = vq
->stats
.bytes
;
1107 tx_total_bytes
+= stats
->q_obytes
[i
];
1110 stats
->ipackets
= rx_total
;
1111 stats
->opackets
= tx_total
;
1112 stats
->oerrors
= tx_missed_total
;
1113 stats
->ibytes
= rx_total_bytes
;
1114 stats
->obytes
= tx_total_bytes
;
1120 eth_stats_reset(struct rte_eth_dev
*dev
)
1122 struct vhost_queue
*vq
;
1125 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
1126 if (dev
->data
->rx_queues
[i
] == NULL
)
1128 vq
= dev
->data
->rx_queues
[i
];
1130 vq
->stats
.bytes
= 0;
1132 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
1133 if (dev
->data
->tx_queues
[i
] == NULL
)
1135 vq
= dev
->data
->tx_queues
[i
];
1137 vq
->stats
.bytes
= 0;
1138 vq
->stats
.missed_pkts
= 0;
1143 eth_queue_release(void *q
)
1149 eth_tx_done_cleanup(void *txq __rte_unused
, uint32_t free_cnt __rte_unused
)
1152 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data
1153 * and releases mbuf, so nothing to cleanup.
1159 eth_link_update(struct rte_eth_dev
*dev __rte_unused
,
1160 int wait_to_complete __rte_unused
)
1166 eth_rx_queue_count(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
)
1168 struct vhost_queue
*vq
;
1170 vq
= dev
->data
->rx_queues
[rx_queue_id
];
1174 return rte_vhost_rx_queue_count(vq
->vid
, vq
->virtqueue_id
);
1177 static const struct eth_dev_ops ops
= {
1178 .dev_start
= eth_dev_start
,
1179 .dev_stop
= eth_dev_stop
,
1180 .dev_close
= eth_dev_close
,
1181 .dev_configure
= eth_dev_configure
,
1182 .dev_infos_get
= eth_dev_info
,
1183 .rx_queue_setup
= eth_rx_queue_setup
,
1184 .tx_queue_setup
= eth_tx_queue_setup
,
1185 .rx_queue_release
= eth_queue_release
,
1186 .tx_queue_release
= eth_queue_release
,
1187 .tx_done_cleanup
= eth_tx_done_cleanup
,
1188 .rx_queue_count
= eth_rx_queue_count
,
1189 .link_update
= eth_link_update
,
1190 .stats_get
= eth_stats_get
,
1191 .stats_reset
= eth_stats_reset
,
1192 .xstats_reset
= vhost_dev_xstats_reset
,
1193 .xstats_get
= vhost_dev_xstats_get
,
1194 .xstats_get_names
= vhost_dev_xstats_get_names
,
1195 .rx_queue_intr_enable
= eth_rxq_intr_enable
,
1196 .rx_queue_intr_disable
= eth_rxq_intr_disable
,
1199 static struct rte_vdev_driver pmd_vhost_drv
;
1202 eth_dev_vhost_create(struct rte_vdev_device
*dev
, char *iface_name
,
1203 int16_t queues
, const unsigned int numa_node
, uint64_t flags
)
1205 const char *name
= rte_vdev_device_name(dev
);
1206 struct rte_eth_dev_data
*data
;
1207 struct pmd_internal
*internal
= NULL
;
1208 struct rte_eth_dev
*eth_dev
= NULL
;
1209 struct ether_addr
*eth_addr
= NULL
;
1210 struct rte_vhost_vring_state
*vring_state
= NULL
;
1211 struct internal_list
*list
= NULL
;
1213 VHOST_LOG(INFO
, "Creating VHOST-USER backend on numa socket %u\n",
1216 list
= rte_zmalloc_socket(name
, sizeof(*list
), 0, numa_node
);
1220 /* reserve an ethdev entry */
1221 eth_dev
= rte_eth_vdev_allocate(dev
, sizeof(*internal
));
1222 if (eth_dev
== NULL
)
1224 data
= eth_dev
->data
;
1226 eth_addr
= rte_zmalloc_socket(name
, sizeof(*eth_addr
), 0, numa_node
);
1227 if (eth_addr
== NULL
)
1229 data
->mac_addrs
= eth_addr
;
1230 *eth_addr
= base_eth_addr
;
1231 eth_addr
->addr_bytes
[5] = eth_dev
->data
->port_id
;
1233 vring_state
= rte_zmalloc_socket(name
,
1234 sizeof(*vring_state
), 0, numa_node
);
1235 if (vring_state
== NULL
)
1238 /* now put it all together
1239 * - store queue data in internal,
1240 * - point eth_dev_data to internals
1241 * - and point eth_dev structure to new eth_dev_data structure
1243 internal
= eth_dev
->data
->dev_private
;
1244 internal
->dev_name
= strdup(name
);
1245 if (internal
->dev_name
== NULL
)
1247 internal
->iface_name
= strdup(iface_name
);
1248 if (internal
->iface_name
== NULL
)
1251 list
->eth_dev
= eth_dev
;
1252 pthread_mutex_lock(&internal_list_lock
);
1253 TAILQ_INSERT_TAIL(&internal_list
, list
, next
);
1254 pthread_mutex_unlock(&internal_list_lock
);
1256 rte_spinlock_init(&vring_state
->lock
);
1257 vring_states
[eth_dev
->data
->port_id
] = vring_state
;
1259 data
->nb_rx_queues
= queues
;
1260 data
->nb_tx_queues
= queues
;
1261 internal
->max_queues
= queues
;
1263 data
->dev_link
= pmd_link
;
1264 data
->dev_flags
= RTE_ETH_DEV_INTR_LSC
;
1266 eth_dev
->dev_ops
= &ops
;
1268 /* finally assign rx and tx ops */
1269 eth_dev
->rx_pkt_burst
= eth_vhost_rx
;
1270 eth_dev
->tx_pkt_burst
= eth_vhost_tx
;
1272 if (rte_vhost_driver_register(iface_name
, flags
))
1275 if (rte_vhost_driver_callback_register(iface_name
, &vhost_ops
) < 0) {
1276 VHOST_LOG(ERR
, "Can't register callbacks\n");
1280 if (rte_vhost_driver_start(iface_name
) < 0) {
1281 VHOST_LOG(ERR
, "Failed to start driver for %s\n",
1286 rte_eth_dev_probing_finish(eth_dev
);
1287 return data
->port_id
;
1291 free(internal
->iface_name
);
1292 free(internal
->dev_name
);
1294 rte_free(vring_state
);
1295 rte_eth_dev_release_port(eth_dev
);
1302 open_iface(const char *key __rte_unused
, const char *value
, void *extra_args
)
1304 const char **iface_name
= extra_args
;
1309 *iface_name
= value
;
1315 open_int(const char *key __rte_unused
, const char *value
, void *extra_args
)
1317 uint16_t *n
= extra_args
;
1319 if (value
== NULL
|| extra_args
== NULL
)
1322 *n
= (uint16_t)strtoul(value
, NULL
, 0);
1323 if (*n
== USHRT_MAX
&& errno
== ERANGE
)
1330 rte_pmd_vhost_probe(struct rte_vdev_device
*dev
)
1332 struct rte_kvargs
*kvlist
= NULL
;
1337 int client_mode
= 0;
1338 int dequeue_zero_copy
= 0;
1339 int iommu_support
= 0;
1340 int postcopy_support
= 0;
1341 struct rte_eth_dev
*eth_dev
;
1342 const char *name
= rte_vdev_device_name(dev
);
1344 VHOST_LOG(INFO
, "Initializing pmd_vhost for %s\n", name
);
1346 if (rte_eal_process_type() == RTE_PROC_SECONDARY
) {
1347 eth_dev
= rte_eth_dev_attach_secondary(name
);
1349 VHOST_LOG(ERR
, "Failed to probe %s\n", name
);
1352 /* TODO: request info from primary to set up Rx and Tx */
1353 eth_dev
->dev_ops
= &ops
;
1354 eth_dev
->device
= &dev
->device
;
1355 rte_eth_dev_probing_finish(eth_dev
);
1359 kvlist
= rte_kvargs_parse(rte_vdev_device_args(dev
), valid_arguments
);
1363 if (rte_kvargs_count(kvlist
, ETH_VHOST_IFACE_ARG
) == 1) {
1364 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_IFACE_ARG
,
1365 &open_iface
, &iface_name
);
1373 if (rte_kvargs_count(kvlist
, ETH_VHOST_QUEUES_ARG
) == 1) {
1374 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_QUEUES_ARG
,
1375 &open_int
, &queues
);
1376 if (ret
< 0 || queues
> RTE_MAX_QUEUES_PER_PORT
)
1382 if (rte_kvargs_count(kvlist
, ETH_VHOST_CLIENT_ARG
) == 1) {
1383 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_CLIENT_ARG
,
1384 &open_int
, &client_mode
);
1389 flags
|= RTE_VHOST_USER_CLIENT
;
1392 if (rte_kvargs_count(kvlist
, ETH_VHOST_DEQUEUE_ZERO_COPY
) == 1) {
1393 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_DEQUEUE_ZERO_COPY
,
1394 &open_int
, &dequeue_zero_copy
);
1398 if (dequeue_zero_copy
)
1399 flags
|= RTE_VHOST_USER_DEQUEUE_ZERO_COPY
;
1402 if (rte_kvargs_count(kvlist
, ETH_VHOST_IOMMU_SUPPORT
) == 1) {
1403 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_IOMMU_SUPPORT
,
1404 &open_int
, &iommu_support
);
1409 flags
|= RTE_VHOST_USER_IOMMU_SUPPORT
;
1412 if (rte_kvargs_count(kvlist
, ETH_VHOST_POSTCOPY_SUPPORT
) == 1) {
1413 ret
= rte_kvargs_process(kvlist
, ETH_VHOST_POSTCOPY_SUPPORT
,
1414 &open_int
, &postcopy_support
);
1418 if (postcopy_support
)
1419 flags
|= RTE_VHOST_USER_POSTCOPY_SUPPORT
;
1422 if (dev
->device
.numa_node
== SOCKET_ID_ANY
)
1423 dev
->device
.numa_node
= rte_socket_id();
1425 eth_dev_vhost_create(dev
, iface_name
, queues
, dev
->device
.numa_node
,
1429 rte_kvargs_free(kvlist
);
1434 rte_pmd_vhost_remove(struct rte_vdev_device
*dev
)
1437 struct rte_eth_dev
*eth_dev
= NULL
;
1439 name
= rte_vdev_device_name(dev
);
1440 VHOST_LOG(INFO
, "Un-Initializing pmd_vhost for %s\n", name
);
1442 /* find an ethdev entry */
1443 eth_dev
= rte_eth_dev_allocated(name
);
1444 if (eth_dev
== NULL
)
1447 if (rte_eal_process_type() != RTE_PROC_PRIMARY
)
1448 return rte_eth_dev_release_port(eth_dev
);
1450 eth_dev_close(eth_dev
);
1452 rte_free(vring_states
[eth_dev
->data
->port_id
]);
1453 vring_states
[eth_dev
->data
->port_id
] = NULL
;
1455 rte_eth_dev_release_port(eth_dev
);
1460 static struct rte_vdev_driver pmd_vhost_drv
= {
1461 .probe
= rte_pmd_vhost_probe
,
1462 .remove
= rte_pmd_vhost_remove
,
1465 RTE_PMD_REGISTER_VDEV(net_vhost
, pmd_vhost_drv
);
1466 RTE_PMD_REGISTER_ALIAS(net_vhost
, eth_vhost
);
1467 RTE_PMD_REGISTER_PARAM_STRING(net_vhost
,
1471 "dequeue-zero-copy=<0|1> "
1472 "iommu-support=<0|1> "
1473 "postcopy-support=<0|1>");
1475 RTE_INIT(vhost_init_log
)
1477 vhost_logtype
= rte_log_register("pmd.net.vhost");
1478 if (vhost_logtype
>= 0)
1479 rte_log_set_level(vhost_logtype
, RTE_LOG_NOTICE
);