1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
10 #include <rte_atomic.h>
11 #include <rte_memory.h>
12 #include <rte_mempool.h>
14 #include "virtio_pci.h"
15 #include "virtio_ring.h"
16 #include "virtio_logs.h"
17 #include "virtio_rxtx.h"
22 * Per virtio_ring.h in Linux.
23 * For virtio_pci on SMP, we don't need to order with respect to MMIO
24 * accesses through relaxed memory I/O windows, so smp_mb() et al are
27 * For using virtio to talk to real devices (eg. vDPA) we do need real
31 virtio_mb(uint8_t weak_barriers
)
40 virtio_rmb(uint8_t weak_barriers
)
49 virtio_wmb(uint8_t weak_barriers
)
57 #ifdef RTE_PMD_PACKET_PREFETCH
58 #define rte_packet_prefetch(p) rte_prefetch1(p)
60 #define rte_packet_prefetch(p) do {} while(0)
63 #define VIRTQUEUE_MAX_NAME_SZ 32
65 #ifdef RTE_VIRTIO_USER
67 * Return the physical address (or virtual address in case of
68 * virtio-user) of mbuf data buffer.
70 * The address is firstly casted to the word size (sizeof(uintptr_t))
71 * before casting it to uint64_t. This is to make it work with different
72 * combination of word size (64 bit and 32 bit) and virtio device
73 * (virtio-pci and virtio-user).
75 #define VIRTIO_MBUF_ADDR(mb, vq) \
76 ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset)))
78 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova)
82 * Return the physical address (or virtual address in case of
83 * virtio-user) of mbuf data buffer, taking care of mbuf data offset
85 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \
86 (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off)
88 #define VTNET_SQ_RQ_QUEUE_IDX 0
89 #define VTNET_SQ_TQ_QUEUE_IDX 1
90 #define VTNET_SQ_CQ_QUEUE_IDX 2
92 enum { VTNET_RQ
= 0, VTNET_TQ
= 1, VTNET_CQ
= 2 };
94 * The maximum virtqueue size is 2^15. Use that value as the end of
95 * descriptor chain terminator since it will never be a valid index
96 * in the descriptor table. This is used to verify we are correctly
97 * handling vq_free_cnt.
99 #define VQ_RING_DESC_CHAIN_END 32768
102 * Control the RX mode, ie. promiscuous, allmulti, etc...
103 * All commands require an "out" sg entry containing a 1 byte
104 * state value, zero = disable, non-zero = enable. Commands
105 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
106 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
108 #define VIRTIO_NET_CTRL_RX 0
109 #define VIRTIO_NET_CTRL_RX_PROMISC 0
110 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
111 #define VIRTIO_NET_CTRL_RX_ALLUNI 2
112 #define VIRTIO_NET_CTRL_RX_NOMULTI 3
113 #define VIRTIO_NET_CTRL_RX_NOUNI 4
114 #define VIRTIO_NET_CTRL_RX_NOBCAST 5
119 * The MAC filter table is managed by the hypervisor, the guest should
120 * assume the size is infinite. Filtering should be considered
121 * non-perfect, ie. based on hypervisor resources, the guest may
122 * received packets from sources not specified in the filter list.
124 * In addition to the class/cmd header, the TABLE_SET command requires
125 * two out scatterlists. Each contains a 4 byte count of entries followed
126 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The
127 * first sg list contains unicast addresses, the second is for multicast.
128 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
131 * The ADDR_SET command requests one out scatterlist, it contains a
132 * 6 bytes MAC address. This functionality is present if the
133 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
135 struct virtio_net_ctrl_mac
{
137 uint8_t macs
[][ETHER_ADDR_LEN
];
138 } __attribute__((__packed__
));
140 #define VIRTIO_NET_CTRL_MAC 1
141 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
142 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1
145 * Control VLAN filtering
147 * The VLAN filter table is controlled via a simple ADD/DEL interface.
148 * VLAN IDs not added may be filtered by the hypervisor. Del is the
149 * opposite of add. Both commands expect an out entry containing a 2
150 * byte VLAN ID. VLAN filtering is available with the
151 * VIRTIO_NET_F_CTRL_VLAN feature bit.
153 #define VIRTIO_NET_CTRL_VLAN 2
154 #define VIRTIO_NET_CTRL_VLAN_ADD 0
155 #define VIRTIO_NET_CTRL_VLAN_DEL 1
158 * Control link announce acknowledgement
160 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
161 * driver has recevied the notification; device would clear the
162 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
165 #define VIRTIO_NET_CTRL_ANNOUNCE 3
166 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
168 struct virtio_net_ctrl_hdr
{
171 } __attribute__((packed
));
173 typedef uint8_t virtio_net_ctrl_ack
;
175 #define VIRTIO_NET_OK 0
176 #define VIRTIO_NET_ERR 1
178 #define VIRTIO_MAX_CTRL_DATA 2048
180 struct virtio_pmd_ctrl
{
181 struct virtio_net_ctrl_hdr hdr
;
182 virtio_net_ctrl_ack status
;
183 uint8_t data
[VIRTIO_MAX_CTRL_DATA
];
186 struct vq_desc_extra
{
193 struct virtio_hw
*hw
; /**< virtio_hw structure pointer. */
196 /**< vring keeping desc, used and avail */
201 /**< vring keeping descs and events */
202 struct vring_packed ring
;
203 bool used_wrap_counter
;
204 uint16_t cached_flags
; /**< cached flags for descs */
205 uint16_t event_flags_shadow
;
209 uint16_t vq_used_cons_idx
; /**< last consumed descriptor */
210 uint16_t vq_nentries
; /**< vring desc numbers */
211 uint16_t vq_free_cnt
; /**< num of desc available */
212 uint16_t vq_avail_idx
; /**< sync until needed */
213 uint16_t vq_free_thresh
; /**< free threshold */
215 void *vq_ring_virt_mem
; /**< linear address of vring*/
216 unsigned int vq_ring_size
;
219 struct virtnet_rx rxq
;
220 struct virtnet_tx txq
;
221 struct virtnet_ctl cq
;
224 rte_iova_t vq_ring_mem
; /**< physical address of vring,
225 * or virtual address for virtio_user. */
228 * Head of the free chain in the descriptor table. If
229 * there are no free descriptors, this will be set to
230 * VQ_RING_DESC_CHAIN_END.
232 uint16_t vq_desc_head_idx
;
233 uint16_t vq_desc_tail_idx
;
234 uint16_t vq_queue_index
; /**< PCI queue index */
235 uint16_t offset
; /**< relative offset to obtain addr in mbuf */
236 uint16_t *notify_addr
;
237 struct rte_mbuf
**sw_ring
; /**< RX software ring. */
238 struct vq_desc_extra vq_descx
[0];
241 /* If multiqueue is provided by host, then we suppport it. */
242 #define VIRTIO_NET_CTRL_MQ 4
243 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
244 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
245 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
248 * This is the first element of the scatter-gather list. If you don't
249 * specify GSO or CSUM features, you can simply ignore the header.
251 struct virtio_net_hdr
{
252 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/
253 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */
255 #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */
256 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */
257 #define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */
258 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */
259 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */
261 uint16_t hdr_len
; /**< Ethernet + IP + tcp/udp hdrs */
262 uint16_t gso_size
; /**< Bytes to append to hdr_len per frame */
263 uint16_t csum_start
; /**< Position to start checksumming from */
264 uint16_t csum_offset
; /**< Offset after that to place checksum */
268 * This is the version of the header to use when the MRG_RXBUF
269 * feature has been negotiated.
271 struct virtio_net_hdr_mrg_rxbuf
{
272 struct virtio_net_hdr hdr
;
273 uint16_t num_buffers
; /**< Number of merged rx buffers */
276 /* Region reserved to allow for transmit header and indirect ring */
277 #define VIRTIO_MAX_TX_INDIRECT 8
278 struct virtio_tx_region
{
279 struct virtio_net_hdr_mrg_rxbuf tx_hdr
;
280 struct vring_desc tx_indir
[VIRTIO_MAX_TX_INDIRECT
]
281 __attribute__((__aligned__(16)));
285 desc_is_used(struct vring_packed_desc
*desc
, struct virtqueue
*vq
)
287 uint16_t used
, avail
, flags
;
290 used
= !!(flags
& VRING_PACKED_DESC_F_USED
);
291 avail
= !!(flags
& VRING_PACKED_DESC_F_AVAIL
);
293 return avail
== used
&& used
== vq
->vq_packed
.used_wrap_counter
;
297 vring_desc_init_packed(struct virtqueue
*vq
, int n
)
300 for (i
= 0; i
< n
- 1; i
++) {
301 vq
->vq_packed
.ring
.desc
[i
].id
= i
;
302 vq
->vq_descx
[i
].next
= i
+ 1;
304 vq
->vq_packed
.ring
.desc
[i
].id
= i
;
305 vq
->vq_descx
[i
].next
= VQ_RING_DESC_CHAIN_END
;
308 /* Chain all the descriptors in the ring with an END */
310 vring_desc_init_split(struct vring_desc
*dp
, uint16_t n
)
314 for (i
= 0; i
< n
- 1; i
++)
315 dp
[i
].next
= (uint16_t)(i
+ 1);
316 dp
[i
].next
= VQ_RING_DESC_CHAIN_END
;
320 * Tell the backend not to interrupt us. Implementation for packed virtqueues.
323 virtqueue_disable_intr_packed(struct virtqueue
*vq
)
325 if (vq
->vq_packed
.event_flags_shadow
!= RING_EVENT_FLAGS_DISABLE
) {
326 vq
->vq_packed
.event_flags_shadow
= RING_EVENT_FLAGS_DISABLE
;
327 vq
->vq_packed
.ring
.driver
->desc_event_flags
=
328 vq
->vq_packed
.event_flags_shadow
;
333 * Tell the backend not to interrupt us. Implementation for split virtqueues.
336 virtqueue_disable_intr_split(struct virtqueue
*vq
)
338 vq
->vq_split
.ring
.avail
->flags
|= VRING_AVAIL_F_NO_INTERRUPT
;
342 * Tell the backend not to interrupt us.
345 virtqueue_disable_intr(struct virtqueue
*vq
)
347 if (vtpci_packed_queue(vq
->hw
))
348 virtqueue_disable_intr_packed(vq
);
350 virtqueue_disable_intr_split(vq
);
354 * Tell the backend to interrupt. Implementation for packed virtqueues.
357 virtqueue_enable_intr_packed(struct virtqueue
*vq
)
359 if (vq
->vq_packed
.event_flags_shadow
== RING_EVENT_FLAGS_DISABLE
) {
360 vq
->vq_packed
.event_flags_shadow
= RING_EVENT_FLAGS_ENABLE
;
361 vq
->vq_packed
.ring
.driver
->desc_event_flags
=
362 vq
->vq_packed
.event_flags_shadow
;
367 * Tell the backend to interrupt. Implementation for split virtqueues.
370 virtqueue_enable_intr_split(struct virtqueue
*vq
)
372 vq
->vq_split
.ring
.avail
->flags
&= (~VRING_AVAIL_F_NO_INTERRUPT
);
376 * Tell the backend to interrupt us.
379 virtqueue_enable_intr(struct virtqueue
*vq
)
381 if (vtpci_packed_queue(vq
->hw
))
382 virtqueue_enable_intr_packed(vq
);
384 virtqueue_enable_intr_split(vq
);
388 * Dump virtqueue internal structures, for debug purpose only.
390 void virtqueue_dump(struct virtqueue
*vq
);
392 * Get all mbufs to be freed.
394 struct rte_mbuf
*virtqueue_detach_unused(struct virtqueue
*vq
);
396 /* Flush the elements in the used ring. */
397 void virtqueue_rxvq_flush(struct virtqueue
*vq
);
400 virtqueue_full(const struct virtqueue
*vq
)
402 return vq
->vq_free_cnt
== 0;
406 virtio_get_queue_type(struct virtio_hw
*hw
, uint16_t vtpci_queue_idx
)
408 if (vtpci_queue_idx
== hw
->max_queue_pairs
* 2)
410 else if (vtpci_queue_idx
% 2 == 0)
416 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_split.ring.used->idx - \
417 (vq)->vq_used_cons_idx))
419 void vq_ring_free_chain(struct virtqueue
*vq
, uint16_t desc_idx
);
420 void vq_ring_free_chain_packed(struct virtqueue
*vq
, uint16_t used_idx
);
421 void vq_ring_free_inorder(struct virtqueue
*vq
, uint16_t desc_idx
,
425 vq_update_avail_idx(struct virtqueue
*vq
)
427 virtio_wmb(vq
->hw
->weak_barriers
);
428 vq
->vq_split
.ring
.avail
->idx
= vq
->vq_avail_idx
;
432 vq_update_avail_ring(struct virtqueue
*vq
, uint16_t desc_idx
)
436 * Place the head of the descriptor chain into the next slot and make
437 * it usable to the host. The chain is made available now rather than
438 * deferring to virtqueue_notify() in the hopes that if the host is
439 * currently running on another CPU, we can keep it processing the new
442 avail_idx
= (uint16_t)(vq
->vq_avail_idx
& (vq
->vq_nentries
- 1));
443 if (unlikely(vq
->vq_split
.ring
.avail
->ring
[avail_idx
] != desc_idx
))
444 vq
->vq_split
.ring
.avail
->ring
[avail_idx
] = desc_idx
;
449 virtqueue_kick_prepare(struct virtqueue
*vq
)
452 * Ensure updated avail->idx is visible to vhost before reading
455 virtio_mb(vq
->hw
->weak_barriers
);
456 return !(vq
->vq_split
.ring
.used
->flags
& VRING_USED_F_NO_NOTIFY
);
460 virtqueue_kick_prepare_packed(struct virtqueue
*vq
)
465 * Ensure updated data is visible to vhost before reading the flags.
467 virtio_mb(vq
->hw
->weak_barriers
);
468 flags
= vq
->vq_packed
.ring
.device
->desc_event_flags
;
470 return flags
!= RING_EVENT_FLAGS_DISABLE
;
474 * virtqueue_kick_prepare*() or the virtio_wmb() should be called
475 * before this function to be sure that all the data is visible to vhost.
478 virtqueue_notify(struct virtqueue
*vq
)
480 VTPCI_OPS(vq
->hw
)->notify_queue(vq
->hw
, vq
);
483 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
484 #define VIRTQUEUE_DUMP(vq) do { \
485 uint16_t used_idx, nused; \
486 used_idx = (vq)->vq_split.ring.used->idx; \
487 nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
488 if (vtpci_packed_queue((vq)->hw)) { \
489 PMD_INIT_LOG(DEBUG, \
490 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
491 " cached_flags=0x%x; used_wrap_counter=%d", \
492 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
493 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
494 (vq)->vq_packed.used_wrap_counter); \
497 PMD_INIT_LOG(DEBUG, \
498 "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
499 " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
500 " avail.flags=0x%x; used.flags=0x%x", \
501 (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
502 (vq)->vq_desc_head_idx, (vq)->vq_split.ring.avail->idx, \
503 (vq)->vq_used_cons_idx, (vq)->vq_split.ring.used->idx, \
504 (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
507 #define VIRTQUEUE_DUMP(vq) do { } while (0)
510 #endif /* _VIRTQUEUE_H_ */