4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 #include <rte_cpuflags.h>
59 #include "virtio_logs.h"
60 #include "virtio_ethdev.h"
61 #include "virtio_pci.h"
62 #include "virtqueue.h"
63 #include "virtio_rxtx.h"
65 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
66 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
68 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0)
72 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
73 ETH_TXQ_FLAGS_NOOFFLOADS)
76 virtio_dev_rx_queue_done(void *rxq
, uint16_t offset
)
78 struct virtnet_rx
*rxvq
= rxq
;
79 struct virtqueue
*vq
= rxvq
->vq
;
81 return VIRTQUEUE_NUSED(vq
) >= offset
;
85 vq_ring_free_chain(struct virtqueue
*vq
, uint16_t desc_idx
)
87 struct vring_desc
*dp
, *dp_tail
;
88 struct vq_desc_extra
*dxp
;
89 uint16_t desc_idx_last
= desc_idx
;
91 dp
= &vq
->vq_ring
.desc
[desc_idx
];
92 dxp
= &vq
->vq_descx
[desc_idx
];
93 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
+ dxp
->ndescs
);
94 if ((dp
->flags
& VRING_DESC_F_INDIRECT
) == 0) {
95 while (dp
->flags
& VRING_DESC_F_NEXT
) {
96 desc_idx_last
= dp
->next
;
97 dp
= &vq
->vq_ring
.desc
[dp
->next
];
103 * We must append the existing free chain, if any, to the end of
104 * newly freed chain. If the virtqueue was completely used, then
105 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
107 if (vq
->vq_desc_tail_idx
== VQ_RING_DESC_CHAIN_END
) {
108 vq
->vq_desc_head_idx
= desc_idx
;
110 dp_tail
= &vq
->vq_ring
.desc
[vq
->vq_desc_tail_idx
];
111 dp_tail
->next
= desc_idx
;
114 vq
->vq_desc_tail_idx
= desc_idx_last
;
115 dp
->next
= VQ_RING_DESC_CHAIN_END
;
119 virtqueue_dequeue_burst_rx(struct virtqueue
*vq
, struct rte_mbuf
**rx_pkts
,
120 uint32_t *len
, uint16_t num
)
122 struct vring_used_elem
*uep
;
123 struct rte_mbuf
*cookie
;
124 uint16_t used_idx
, desc_idx
;
127 /* Caller does the check */
128 for (i
= 0; i
< num
; i
++) {
129 used_idx
= (uint16_t)(vq
->vq_used_cons_idx
& (vq
->vq_nentries
- 1));
130 uep
= &vq
->vq_ring
.used
->ring
[used_idx
];
131 desc_idx
= (uint16_t) uep
->id
;
133 cookie
= (struct rte_mbuf
*)vq
->vq_descx
[desc_idx
].cookie
;
135 if (unlikely(cookie
== NULL
)) {
136 PMD_DRV_LOG(ERR
, "vring descriptor with no mbuf cookie at %u",
137 vq
->vq_used_cons_idx
);
141 rte_prefetch0(cookie
);
142 rte_packet_prefetch(rte_pktmbuf_mtod(cookie
, void *));
144 vq
->vq_used_cons_idx
++;
145 vq_ring_free_chain(vq
, desc_idx
);
146 vq
->vq_descx
[desc_idx
].cookie
= NULL
;
152 #ifndef DEFAULT_TX_FREE_THRESH
153 #define DEFAULT_TX_FREE_THRESH 32
156 /* Cleanup from completed transmits. */
158 virtio_xmit_cleanup(struct virtqueue
*vq
, uint16_t num
)
160 uint16_t i
, used_idx
, desc_idx
;
161 for (i
= 0; i
< num
; i
++) {
162 struct vring_used_elem
*uep
;
163 struct vq_desc_extra
*dxp
;
165 used_idx
= (uint16_t)(vq
->vq_used_cons_idx
& (vq
->vq_nentries
- 1));
166 uep
= &vq
->vq_ring
.used
->ring
[used_idx
];
168 desc_idx
= (uint16_t) uep
->id
;
169 dxp
= &vq
->vq_descx
[desc_idx
];
170 vq
->vq_used_cons_idx
++;
171 vq_ring_free_chain(vq
, desc_idx
);
173 if (dxp
->cookie
!= NULL
) {
174 rte_pktmbuf_free(dxp
->cookie
);
182 virtqueue_enqueue_recv_refill(struct virtqueue
*vq
, struct rte_mbuf
*cookie
)
184 struct vq_desc_extra
*dxp
;
185 struct virtio_hw
*hw
= vq
->hw
;
186 struct vring_desc
*start_dp
;
188 uint16_t head_idx
, idx
;
190 if (unlikely(vq
->vq_free_cnt
== 0))
192 if (unlikely(vq
->vq_free_cnt
< needed
))
195 head_idx
= vq
->vq_desc_head_idx
;
196 if (unlikely(head_idx
>= vq
->vq_nentries
))
200 dxp
= &vq
->vq_descx
[idx
];
201 dxp
->cookie
= (void *)cookie
;
202 dxp
->ndescs
= needed
;
204 start_dp
= vq
->vq_ring
.desc
;
206 VIRTIO_MBUF_ADDR(cookie
, vq
) +
207 RTE_PKTMBUF_HEADROOM
- hw
->vtnet_hdr_size
;
209 cookie
->buf_len
- RTE_PKTMBUF_HEADROOM
+ hw
->vtnet_hdr_size
;
210 start_dp
[idx
].flags
= VRING_DESC_F_WRITE
;
211 idx
= start_dp
[idx
].next
;
212 vq
->vq_desc_head_idx
= idx
;
213 if (vq
->vq_desc_head_idx
== VQ_RING_DESC_CHAIN_END
)
214 vq
->vq_desc_tail_idx
= idx
;
215 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
- needed
);
216 vq_update_avail_ring(vq
, head_idx
);
221 /* When doing TSO, the IP length is not included in the pseudo header
222 * checksum of the packet given to the PMD, but for virtio it is
226 virtio_tso_fix_cksum(struct rte_mbuf
*m
)
228 /* common case: header is not fragmented */
229 if (likely(rte_pktmbuf_data_len(m
) >= m
->l2_len
+ m
->l3_len
+
231 struct ipv4_hdr
*iph
;
232 struct ipv6_hdr
*ip6h
;
234 uint16_t prev_cksum
, new_cksum
, ip_len
, ip_paylen
;
237 iph
= rte_pktmbuf_mtod_offset(m
, struct ipv4_hdr
*, m
->l2_len
);
238 th
= RTE_PTR_ADD(iph
, m
->l3_len
);
239 if ((iph
->version_ihl
>> 4) == 4) {
240 iph
->hdr_checksum
= 0;
241 iph
->hdr_checksum
= rte_ipv4_cksum(iph
);
242 ip_len
= iph
->total_length
;
243 ip_paylen
= rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len
) -
246 ip6h
= (struct ipv6_hdr
*)iph
;
247 ip_paylen
= ip6h
->payload_len
;
250 /* calculate the new phdr checksum not including ip_paylen */
251 prev_cksum
= th
->cksum
;
254 tmp
= (tmp
& 0xffff) + (tmp
>> 16);
257 /* replace it in the packet */
258 th
->cksum
= new_cksum
;
263 tx_offload_enabled(struct virtio_hw
*hw
)
265 return vtpci_with_feature(hw
, VIRTIO_NET_F_CSUM
) ||
266 vtpci_with_feature(hw
, VIRTIO_NET_F_HOST_TSO4
) ||
267 vtpci_with_feature(hw
, VIRTIO_NET_F_HOST_TSO6
);
270 /* avoid write operation when necessary, to lessen cache issues */
271 #define ASSIGN_UNLESS_EQUAL(var, val) do { \
272 if ((var) != (val)) \
277 virtqueue_enqueue_xmit(struct virtnet_tx
*txvq
, struct rte_mbuf
*cookie
,
278 uint16_t needed
, int use_indirect
, int can_push
)
280 struct virtio_tx_region
*txr
= txvq
->virtio_net_hdr_mz
->addr
;
281 struct vq_desc_extra
*dxp
;
282 struct virtqueue
*vq
= txvq
->vq
;
283 struct vring_desc
*start_dp
;
284 uint16_t seg_num
= cookie
->nb_segs
;
285 uint16_t head_idx
, idx
;
286 uint16_t head_size
= vq
->hw
->vtnet_hdr_size
;
287 struct virtio_net_hdr
*hdr
;
290 offload
= tx_offload_enabled(vq
->hw
);
291 head_idx
= vq
->vq_desc_head_idx
;
293 dxp
= &vq
->vq_descx
[idx
];
294 dxp
->cookie
= (void *)cookie
;
295 dxp
->ndescs
= needed
;
297 start_dp
= vq
->vq_ring
.desc
;
300 /* prepend cannot fail, checked by caller */
301 hdr
= (struct virtio_net_hdr
*)
302 rte_pktmbuf_prepend(cookie
, head_size
);
303 /* if offload disabled, it is not zeroed below, do it now */
305 ASSIGN_UNLESS_EQUAL(hdr
->csum_start
, 0);
306 ASSIGN_UNLESS_EQUAL(hdr
->csum_offset
, 0);
307 ASSIGN_UNLESS_EQUAL(hdr
->flags
, 0);
308 ASSIGN_UNLESS_EQUAL(hdr
->gso_type
, 0);
309 ASSIGN_UNLESS_EQUAL(hdr
->gso_size
, 0);
310 ASSIGN_UNLESS_EQUAL(hdr
->hdr_len
, 0);
312 } else if (use_indirect
) {
313 /* setup tx ring slot to point to indirect
314 * descriptor list stored in reserved region.
316 * the first slot in indirect ring is already preset
317 * to point to the header in reserved region
319 start_dp
[idx
].addr
= txvq
->virtio_net_hdr_mem
+
320 RTE_PTR_DIFF(&txr
[idx
].tx_indir
, txr
);
321 start_dp
[idx
].len
= (seg_num
+ 1) * sizeof(struct vring_desc
);
322 start_dp
[idx
].flags
= VRING_DESC_F_INDIRECT
;
323 hdr
= (struct virtio_net_hdr
*)&txr
[idx
].tx_hdr
;
325 /* loop below will fill in rest of the indirect elements */
326 start_dp
= txr
[idx
].tx_indir
;
329 /* setup first tx ring slot to point to header
330 * stored in reserved region.
332 start_dp
[idx
].addr
= txvq
->virtio_net_hdr_mem
+
333 RTE_PTR_DIFF(&txr
[idx
].tx_hdr
, txr
);
334 start_dp
[idx
].len
= vq
->hw
->vtnet_hdr_size
;
335 start_dp
[idx
].flags
= VRING_DESC_F_NEXT
;
336 hdr
= (struct virtio_net_hdr
*)&txr
[idx
].tx_hdr
;
338 idx
= start_dp
[idx
].next
;
341 /* Checksum Offload / TSO */
343 if (cookie
->ol_flags
& PKT_TX_TCP_SEG
)
344 cookie
->ol_flags
|= PKT_TX_TCP_CKSUM
;
346 switch (cookie
->ol_flags
& PKT_TX_L4_MASK
) {
347 case PKT_TX_UDP_CKSUM
:
348 hdr
->csum_start
= cookie
->l2_len
+ cookie
->l3_len
;
349 hdr
->csum_offset
= offsetof(struct udp_hdr
,
351 hdr
->flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
354 case PKT_TX_TCP_CKSUM
:
355 hdr
->csum_start
= cookie
->l2_len
+ cookie
->l3_len
;
356 hdr
->csum_offset
= offsetof(struct tcp_hdr
, cksum
);
357 hdr
->flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
361 ASSIGN_UNLESS_EQUAL(hdr
->csum_start
, 0);
362 ASSIGN_UNLESS_EQUAL(hdr
->csum_offset
, 0);
363 ASSIGN_UNLESS_EQUAL(hdr
->flags
, 0);
367 /* TCP Segmentation Offload */
368 if (cookie
->ol_flags
& PKT_TX_TCP_SEG
) {
369 virtio_tso_fix_cksum(cookie
);
370 hdr
->gso_type
= (cookie
->ol_flags
& PKT_TX_IPV6
) ?
371 VIRTIO_NET_HDR_GSO_TCPV6
:
372 VIRTIO_NET_HDR_GSO_TCPV4
;
373 hdr
->gso_size
= cookie
->tso_segsz
;
379 ASSIGN_UNLESS_EQUAL(hdr
->gso_type
, 0);
380 ASSIGN_UNLESS_EQUAL(hdr
->gso_size
, 0);
381 ASSIGN_UNLESS_EQUAL(hdr
->hdr_len
, 0);
386 start_dp
[idx
].addr
= VIRTIO_MBUF_DATA_DMA_ADDR(cookie
, vq
);
387 start_dp
[idx
].len
= cookie
->data_len
;
388 start_dp
[idx
].flags
= cookie
->next
? VRING_DESC_F_NEXT
: 0;
389 idx
= start_dp
[idx
].next
;
390 } while ((cookie
= cookie
->next
) != NULL
);
393 idx
= vq
->vq_ring
.desc
[head_idx
].next
;
395 vq
->vq_desc_head_idx
= idx
;
396 if (vq
->vq_desc_head_idx
== VQ_RING_DESC_CHAIN_END
)
397 vq
->vq_desc_tail_idx
= idx
;
398 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
- needed
);
399 vq_update_avail_ring(vq
, head_idx
);
403 virtio_dev_cq_start(struct rte_eth_dev
*dev
)
405 struct virtio_hw
*hw
= dev
->data
->dev_private
;
407 if (hw
->cvq
&& hw
->cvq
->vq
) {
408 VIRTQUEUE_DUMP((struct virtqueue
*)hw
->cvq
->vq
);
413 virtio_dev_rx_queue_setup(struct rte_eth_dev
*dev
,
416 unsigned int socket_id __rte_unused
,
417 __rte_unused
const struct rte_eth_rxconf
*rx_conf
,
418 struct rte_mempool
*mp
)
420 uint16_t vtpci_queue_idx
= 2 * queue_idx
+ VTNET_SQ_RQ_QUEUE_IDX
;
421 struct virtio_hw
*hw
= dev
->data
->dev_private
;
422 struct virtqueue
*vq
= hw
->vqs
[vtpci_queue_idx
];
423 struct virtnet_rx
*rxvq
;
428 PMD_INIT_FUNC_TRACE();
430 if (nb_desc
== 0 || nb_desc
> vq
->vq_nentries
)
431 nb_desc
= vq
->vq_nentries
;
432 vq
->vq_free_cnt
= RTE_MIN(vq
->vq_free_cnt
, nb_desc
);
435 rxvq
->queue_id
= queue_idx
;
437 if (rxvq
->mpool
== NULL
) {
438 rte_exit(EXIT_FAILURE
,
439 "Cannot allocate mbufs for rx virtqueue");
441 dev
->data
->rx_queues
[queue_idx
] = rxvq
;
444 /* Allocate blank mbufs for the each rx descriptor */
448 if (hw
->use_simple_rxtx
) {
449 for (desc_idx
= 0; desc_idx
< vq
->vq_nentries
;
451 vq
->vq_ring
.avail
->ring
[desc_idx
] = desc_idx
;
452 vq
->vq_ring
.desc
[desc_idx
].flags
=
457 memset(&rxvq
->fake_mbuf
, 0, sizeof(rxvq
->fake_mbuf
));
458 for (desc_idx
= 0; desc_idx
< RTE_PMD_VIRTIO_RX_MAX_BURST
;
460 vq
->sw_ring
[vq
->vq_nentries
+ desc_idx
] =
464 while (!virtqueue_full(vq
)) {
465 m
= rte_mbuf_raw_alloc(rxvq
->mpool
);
469 /* Enqueue allocated buffers */
470 if (hw
->use_simple_rxtx
)
471 error
= virtqueue_enqueue_recv_refill_simple(vq
, m
);
473 error
= virtqueue_enqueue_recv_refill(vq
, m
);
482 vq_update_avail_idx(vq
);
484 PMD_INIT_LOG(DEBUG
, "Allocated %d bufs", nbufs
);
486 virtio_rxq_vec_setup(rxvq
);
494 virtio_update_rxtx_handler(struct rte_eth_dev
*dev
,
495 const struct rte_eth_txconf
*tx_conf
)
497 uint8_t use_simple_rxtx
= 0;
498 struct virtio_hw
*hw
= dev
->data
->dev_private
;
500 #if defined RTE_ARCH_X86
501 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3
))
503 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
504 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON
))
507 /* Use simple rx/tx func if single segment and no offloads */
508 if (use_simple_rxtx
&&
509 (tx_conf
->txq_flags
& VIRTIO_SIMPLE_FLAGS
) == VIRTIO_SIMPLE_FLAGS
&&
510 !vtpci_with_feature(hw
, VIRTIO_NET_F_MRG_RXBUF
)) {
511 PMD_INIT_LOG(INFO
, "Using simple rx/tx path");
512 dev
->tx_pkt_burst
= virtio_xmit_pkts_simple
;
513 dev
->rx_pkt_burst
= virtio_recv_pkts_vec
;
514 hw
->use_simple_rxtx
= use_simple_rxtx
;
519 * struct rte_eth_dev *dev: Used to update dev
520 * uint16_t nb_desc: Defaults to values read from config space
521 * unsigned int socket_id: Used to allocate memzone
522 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
523 * uint16_t queue_idx: Just used as an index in dev txq list
526 virtio_dev_tx_queue_setup(struct rte_eth_dev
*dev
,
529 unsigned int socket_id __rte_unused
,
530 const struct rte_eth_txconf
*tx_conf
)
532 uint8_t vtpci_queue_idx
= 2 * queue_idx
+ VTNET_SQ_TQ_QUEUE_IDX
;
533 struct virtio_hw
*hw
= dev
->data
->dev_private
;
534 struct virtqueue
*vq
= hw
->vqs
[vtpci_queue_idx
];
535 struct virtnet_tx
*txvq
;
536 uint16_t tx_free_thresh
;
539 PMD_INIT_FUNC_TRACE();
541 virtio_update_rxtx_handler(dev
, tx_conf
);
543 if (nb_desc
== 0 || nb_desc
> vq
->vq_nentries
)
544 nb_desc
= vq
->vq_nentries
;
545 vq
->vq_free_cnt
= RTE_MIN(vq
->vq_free_cnt
, nb_desc
);
548 txvq
->queue_id
= queue_idx
;
550 tx_free_thresh
= tx_conf
->tx_free_thresh
;
551 if (tx_free_thresh
== 0)
553 RTE_MIN(vq
->vq_nentries
/ 4, DEFAULT_TX_FREE_THRESH
);
555 if (tx_free_thresh
>= (vq
->vq_nentries
- 3)) {
556 RTE_LOG(ERR
, PMD
, "tx_free_thresh must be less than the "
557 "number of TX entries minus 3 (%u)."
558 " (tx_free_thresh=%u port=%u queue=%u)\n",
560 tx_free_thresh
, dev
->data
->port_id
, queue_idx
);
564 vq
->vq_free_thresh
= tx_free_thresh
;
566 if (hw
->use_simple_rxtx
) {
567 uint16_t mid_idx
= vq
->vq_nentries
>> 1;
569 for (desc_idx
= 0; desc_idx
< mid_idx
; desc_idx
++) {
570 vq
->vq_ring
.avail
->ring
[desc_idx
] =
572 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].next
=
574 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].addr
=
575 txvq
->virtio_net_hdr_mem
+
576 offsetof(struct virtio_tx_region
, tx_hdr
);
577 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].len
=
578 vq
->hw
->vtnet_hdr_size
;
579 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].flags
=
581 vq
->vq_ring
.desc
[desc_idx
].flags
= 0;
583 for (desc_idx
= mid_idx
; desc_idx
< vq
->vq_nentries
;
585 vq
->vq_ring
.avail
->ring
[desc_idx
] = desc_idx
;
590 dev
->data
->tx_queues
[queue_idx
] = txvq
;
595 virtio_discard_rxbuf(struct virtqueue
*vq
, struct rte_mbuf
*m
)
599 * Requeue the discarded mbuf. This should always be
600 * successful since it was just dequeued.
602 error
= virtqueue_enqueue_recv_refill(vq
, m
);
603 if (unlikely(error
)) {
604 RTE_LOG(ERR
, PMD
, "cannot requeue discarded mbuf");
610 virtio_update_packet_stats(struct virtnet_stats
*stats
, struct rte_mbuf
*mbuf
)
612 uint32_t s
= mbuf
->pkt_len
;
613 struct ether_addr
*ea
;
616 stats
->size_bins
[1]++;
617 } else if (s
> 64 && s
< 1024) {
620 /* count zeros, and offset into correct bin */
621 bin
= (sizeof(s
) * 8) - __builtin_clz(s
) - 5;
622 stats
->size_bins
[bin
]++;
625 stats
->size_bins
[0]++;
627 stats
->size_bins
[6]++;
629 stats
->size_bins
[7]++;
632 ea
= rte_pktmbuf_mtod(mbuf
, struct ether_addr
*);
633 if (is_multicast_ether_addr(ea
)) {
634 if (is_broadcast_ether_addr(ea
))
641 /* Optionally fill offload information in structure */
643 virtio_rx_offload(struct rte_mbuf
*m
, struct virtio_net_hdr
*hdr
)
645 struct rte_net_hdr_lens hdr_lens
;
646 uint32_t hdrlen
, ptype
;
647 int l4_supported
= 0;
650 if (hdr
->flags
== 0 && hdr
->gso_type
== VIRTIO_NET_HDR_GSO_NONE
)
653 m
->ol_flags
|= PKT_RX_IP_CKSUM_UNKNOWN
;
655 ptype
= rte_net_get_ptype(m
, &hdr_lens
, RTE_PTYPE_ALL_MASK
);
656 m
->packet_type
= ptype
;
657 if ((ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_TCP
||
658 (ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_UDP
||
659 (ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_SCTP
)
662 if (hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
663 hdrlen
= hdr_lens
.l2_len
+ hdr_lens
.l3_len
+ hdr_lens
.l4_len
;
664 if (hdr
->csum_start
<= hdrlen
&& l4_supported
) {
665 m
->ol_flags
|= PKT_RX_L4_CKSUM_NONE
;
667 /* Unknown proto or tunnel, do sw cksum. We can assume
668 * the cksum field is in the first segment since the
669 * buffers we provided to the host are large enough.
670 * In case of SCTP, this will be wrong since it's a CRC
671 * but there's nothing we can do.
675 rte_raw_cksum_mbuf(m
, hdr
->csum_start
,
676 rte_pktmbuf_pkt_len(m
) - hdr
->csum_start
,
678 if (likely(csum
!= 0xffff))
680 off
= hdr
->csum_offset
+ hdr
->csum_start
;
681 if (rte_pktmbuf_data_len(m
) >= off
+ 1)
682 *rte_pktmbuf_mtod_offset(m
, uint16_t *,
685 } else if (hdr
->flags
& VIRTIO_NET_HDR_F_DATA_VALID
&& l4_supported
) {
686 m
->ol_flags
|= PKT_RX_L4_CKSUM_GOOD
;
689 /* GSO request, save required information in mbuf */
690 if (hdr
->gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
691 /* Check unsupported modes */
692 if ((hdr
->gso_type
& VIRTIO_NET_HDR_GSO_ECN
) ||
693 (hdr
->gso_size
== 0)) {
697 /* Update mss lengthes in mbuf */
698 m
->tso_segsz
= hdr
->gso_size
;
699 switch (hdr
->gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
700 case VIRTIO_NET_HDR_GSO_TCPV4
:
701 case VIRTIO_NET_HDR_GSO_TCPV6
:
702 m
->ol_flags
|= PKT_RX_LRO
| \
703 PKT_RX_L4_CKSUM_NONE
;
714 rx_offload_enabled(struct virtio_hw
*hw
)
716 return vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_CSUM
) ||
717 vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_TSO4
) ||
718 vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_TSO6
);
721 #define VIRTIO_MBUF_BURST_SZ 64
722 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
724 virtio_recv_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
, uint16_t nb_pkts
)
726 struct virtnet_rx
*rxvq
= rx_queue
;
727 struct virtqueue
*vq
= rxvq
->vq
;
728 struct virtio_hw
*hw
= vq
->hw
;
729 struct rte_mbuf
*rxm
, *new_mbuf
;
730 uint16_t nb_used
, num
, nb_rx
;
731 uint32_t len
[VIRTIO_MBUF_BURST_SZ
];
732 struct rte_mbuf
*rcv_pkts
[VIRTIO_MBUF_BURST_SZ
];
734 uint32_t i
, nb_enqueued
;
737 struct virtio_net_hdr
*hdr
;
740 if (unlikely(hw
->started
== 0))
743 nb_used
= VIRTQUEUE_NUSED(vq
);
747 num
= (uint16_t)(likely(nb_used
<= nb_pkts
) ? nb_used
: nb_pkts
);
748 num
= (uint16_t)(likely(num
<= VIRTIO_MBUF_BURST_SZ
) ? num
: VIRTIO_MBUF_BURST_SZ
);
749 if (likely(num
> DESC_PER_CACHELINE
))
750 num
= num
- ((vq
->vq_used_cons_idx
+ num
) % DESC_PER_CACHELINE
);
752 num
= virtqueue_dequeue_burst_rx(vq
, rcv_pkts
, len
, num
);
753 PMD_RX_LOG(DEBUG
, "used:%d dequeue:%d", nb_used
, num
);
756 hdr_size
= hw
->vtnet_hdr_size
;
757 offload
= rx_offload_enabled(hw
);
759 for (i
= 0; i
< num
; i
++) {
762 PMD_RX_LOG(DEBUG
, "packet len:%d", len
[i
]);
764 if (unlikely(len
[i
] < hdr_size
+ ETHER_HDR_LEN
)) {
765 PMD_RX_LOG(ERR
, "Packet drop");
767 virtio_discard_rxbuf(vq
, rxm
);
768 rxvq
->stats
.errors
++;
772 rxm
->port
= rxvq
->port_id
;
773 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
;
777 rxm
->pkt_len
= (uint32_t)(len
[i
] - hdr_size
);
778 rxm
->data_len
= (uint16_t)(len
[i
] - hdr_size
);
780 hdr
= (struct virtio_net_hdr
*)((char *)rxm
->buf_addr
+
781 RTE_PKTMBUF_HEADROOM
- hdr_size
);
786 if (offload
&& virtio_rx_offload(rxm
, hdr
) < 0) {
787 virtio_discard_rxbuf(vq
, rxm
);
788 rxvq
->stats
.errors
++;
792 VIRTIO_DUMP_PACKET(rxm
, rxm
->data_len
);
794 rx_pkts
[nb_rx
++] = rxm
;
796 rxvq
->stats
.bytes
+= rxm
->pkt_len
;
797 virtio_update_packet_stats(&rxvq
->stats
, rxm
);
800 rxvq
->stats
.packets
+= nb_rx
;
802 /* Allocate new mbuf for the used descriptor */
804 while (likely(!virtqueue_full(vq
))) {
805 new_mbuf
= rte_mbuf_raw_alloc(rxvq
->mpool
);
806 if (unlikely(new_mbuf
== NULL
)) {
807 struct rte_eth_dev
*dev
808 = &rte_eth_devices
[rxvq
->port_id
];
809 dev
->data
->rx_mbuf_alloc_failed
++;
812 error
= virtqueue_enqueue_recv_refill(vq
, new_mbuf
);
813 if (unlikely(error
)) {
814 rte_pktmbuf_free(new_mbuf
);
820 if (likely(nb_enqueued
)) {
821 vq_update_avail_idx(vq
);
823 if (unlikely(virtqueue_kick_prepare(vq
))) {
824 virtqueue_notify(vq
);
825 PMD_RX_LOG(DEBUG
, "Notified");
833 virtio_recv_mergeable_pkts(void *rx_queue
,
834 struct rte_mbuf
**rx_pkts
,
837 struct virtnet_rx
*rxvq
= rx_queue
;
838 struct virtqueue
*vq
= rxvq
->vq
;
839 struct virtio_hw
*hw
= vq
->hw
;
840 struct rte_mbuf
*rxm
, *new_mbuf
;
841 uint16_t nb_used
, num
, nb_rx
;
842 uint32_t len
[VIRTIO_MBUF_BURST_SZ
];
843 struct rte_mbuf
*rcv_pkts
[VIRTIO_MBUF_BURST_SZ
];
844 struct rte_mbuf
*prev
;
846 uint32_t i
, nb_enqueued
;
854 if (unlikely(hw
->started
== 0))
857 nb_used
= VIRTQUEUE_NUSED(vq
);
861 PMD_RX_LOG(DEBUG
, "used:%d", nb_used
);
868 hdr_size
= hw
->vtnet_hdr_size
;
869 offload
= rx_offload_enabled(hw
);
871 while (i
< nb_used
) {
872 struct virtio_net_hdr_mrg_rxbuf
*header
;
874 if (nb_rx
== nb_pkts
)
877 num
= virtqueue_dequeue_burst_rx(vq
, rcv_pkts
, len
, 1);
883 PMD_RX_LOG(DEBUG
, "dequeue:%d", num
);
884 PMD_RX_LOG(DEBUG
, "packet len:%d", len
[0]);
888 if (unlikely(len
[0] < hdr_size
+ ETHER_HDR_LEN
)) {
889 PMD_RX_LOG(ERR
, "Packet drop");
891 virtio_discard_rxbuf(vq
, rxm
);
892 rxvq
->stats
.errors
++;
896 header
= (struct virtio_net_hdr_mrg_rxbuf
*)((char *)rxm
->buf_addr
+
897 RTE_PKTMBUF_HEADROOM
- hdr_size
);
898 seg_num
= header
->num_buffers
;
903 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
;
904 rxm
->nb_segs
= seg_num
;
907 rxm
->pkt_len
= (uint32_t)(len
[0] - hdr_size
);
908 rxm
->data_len
= (uint16_t)(len
[0] - hdr_size
);
910 rxm
->port
= rxvq
->port_id
;
911 rx_pkts
[nb_rx
] = rxm
;
914 if (offload
&& virtio_rx_offload(rxm
, &header
->hdr
) < 0) {
915 virtio_discard_rxbuf(vq
, rxm
);
916 rxvq
->stats
.errors
++;
920 seg_res
= seg_num
- 1;
922 while (seg_res
!= 0) {
924 * Get extra segments for current uncompleted packet.
927 RTE_MIN(seg_res
, RTE_DIM(rcv_pkts
));
928 if (likely(VIRTQUEUE_NUSED(vq
) >= rcv_cnt
)) {
930 virtqueue_dequeue_burst_rx(vq
,
931 rcv_pkts
, len
, rcv_cnt
);
936 "No enough segments for packet.");
938 virtio_discard_rxbuf(vq
, rxm
);
939 rxvq
->stats
.errors
++;
945 while (extra_idx
< rcv_cnt
) {
946 rxm
= rcv_pkts
[extra_idx
];
948 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
- hdr_size
;
949 rxm
->pkt_len
= (uint32_t)(len
[extra_idx
]);
950 rxm
->data_len
= (uint16_t)(len
[extra_idx
]);
956 rx_pkts
[nb_rx
]->pkt_len
+= rxm
->pkt_len
;
963 rte_vlan_strip(rx_pkts
[nb_rx
]);
965 VIRTIO_DUMP_PACKET(rx_pkts
[nb_rx
],
966 rx_pkts
[nb_rx
]->data_len
);
968 rxvq
->stats
.bytes
+= rx_pkts
[nb_rx
]->pkt_len
;
969 virtio_update_packet_stats(&rxvq
->stats
, rx_pkts
[nb_rx
]);
973 rxvq
->stats
.packets
+= nb_rx
;
975 /* Allocate new mbuf for the used descriptor */
977 while (likely(!virtqueue_full(vq
))) {
978 new_mbuf
= rte_mbuf_raw_alloc(rxvq
->mpool
);
979 if (unlikely(new_mbuf
== NULL
)) {
980 struct rte_eth_dev
*dev
981 = &rte_eth_devices
[rxvq
->port_id
];
982 dev
->data
->rx_mbuf_alloc_failed
++;
985 error
= virtqueue_enqueue_recv_refill(vq
, new_mbuf
);
986 if (unlikely(error
)) {
987 rte_pktmbuf_free(new_mbuf
);
993 if (likely(nb_enqueued
)) {
994 vq_update_avail_idx(vq
);
996 if (unlikely(virtqueue_kick_prepare(vq
))) {
997 virtqueue_notify(vq
);
998 PMD_RX_LOG(DEBUG
, "Notified");
1006 virtio_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
, uint16_t nb_pkts
)
1008 struct virtnet_tx
*txvq
= tx_queue
;
1009 struct virtqueue
*vq
= txvq
->vq
;
1010 struct virtio_hw
*hw
= vq
->hw
;
1011 uint16_t hdr_size
= hw
->vtnet_hdr_size
;
1012 uint16_t nb_used
, nb_tx
= 0;
1015 if (unlikely(hw
->started
== 0))
1018 if (unlikely(nb_pkts
< 1))
1021 PMD_TX_LOG(DEBUG
, "%d packets to xmit", nb_pkts
);
1022 nb_used
= VIRTQUEUE_NUSED(vq
);
1025 if (likely(nb_used
> vq
->vq_nentries
- vq
->vq_free_thresh
))
1026 virtio_xmit_cleanup(vq
, nb_used
);
1028 for (nb_tx
= 0; nb_tx
< nb_pkts
; nb_tx
++) {
1029 struct rte_mbuf
*txm
= tx_pkts
[nb_tx
];
1030 int can_push
= 0, use_indirect
= 0, slots
, need
;
1032 /* Do VLAN tag insertion */
1033 if (unlikely(txm
->ol_flags
& PKT_TX_VLAN_PKT
)) {
1034 error
= rte_vlan_insert(&txm
);
1035 if (unlikely(error
)) {
1036 rte_pktmbuf_free(txm
);
1041 /* optimize ring usage */
1042 if ((vtpci_with_feature(hw
, VIRTIO_F_ANY_LAYOUT
) ||
1043 vtpci_with_feature(hw
, VIRTIO_F_VERSION_1
)) &&
1044 rte_mbuf_refcnt_read(txm
) == 1 &&
1045 RTE_MBUF_DIRECT(txm
) &&
1046 txm
->nb_segs
== 1 &&
1047 rte_pktmbuf_headroom(txm
) >= hdr_size
&&
1048 rte_is_aligned(rte_pktmbuf_mtod(txm
, char *),
1049 __alignof__(struct virtio_net_hdr_mrg_rxbuf
)))
1051 else if (vtpci_with_feature(hw
, VIRTIO_RING_F_INDIRECT_DESC
) &&
1052 txm
->nb_segs
< VIRTIO_MAX_TX_INDIRECT
)
1055 /* How many main ring entries are needed to this Tx?
1056 * any_layout => number of segments
1058 * default => number of segments + 1
1060 slots
= use_indirect
? 1 : (txm
->nb_segs
+ !can_push
);
1061 need
= slots
- vq
->vq_free_cnt
;
1063 /* Positive value indicates it need free vring descriptors */
1064 if (unlikely(need
> 0)) {
1065 nb_used
= VIRTQUEUE_NUSED(vq
);
1067 need
= RTE_MIN(need
, (int)nb_used
);
1069 virtio_xmit_cleanup(vq
, need
);
1070 need
= slots
- vq
->vq_free_cnt
;
1071 if (unlikely(need
> 0)) {
1073 "No free tx descriptors to transmit");
1078 /* Enqueue Packet buffers */
1079 virtqueue_enqueue_xmit(txvq
, txm
, slots
, use_indirect
, can_push
);
1081 txvq
->stats
.bytes
+= txm
->pkt_len
;
1082 virtio_update_packet_stats(&txvq
->stats
, txm
);
1085 txvq
->stats
.packets
+= nb_tx
;
1087 if (likely(nb_tx
)) {
1088 vq_update_avail_idx(vq
);
1090 if (unlikely(virtqueue_kick_prepare(vq
))) {
1091 virtqueue_notify(vq
);
1092 PMD_TX_LOG(DEBUG
, "Notified backend after xmit");