4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 #include <rte_cpuflags.h>
59 #include "virtio_logs.h"
60 #include "virtio_ethdev.h"
61 #include "virtio_pci.h"
62 #include "virtqueue.h"
63 #include "virtio_rxtx.h"
65 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
66 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
68 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0)
72 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
73 ETH_TXQ_FLAGS_NOOFFLOADS)
76 vq_ring_free_chain(struct virtqueue
*vq
, uint16_t desc_idx
)
78 struct vring_desc
*dp
, *dp_tail
;
79 struct vq_desc_extra
*dxp
;
80 uint16_t desc_idx_last
= desc_idx
;
82 dp
= &vq
->vq_ring
.desc
[desc_idx
];
83 dxp
= &vq
->vq_descx
[desc_idx
];
84 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
+ dxp
->ndescs
);
85 if ((dp
->flags
& VRING_DESC_F_INDIRECT
) == 0) {
86 while (dp
->flags
& VRING_DESC_F_NEXT
) {
87 desc_idx_last
= dp
->next
;
88 dp
= &vq
->vq_ring
.desc
[dp
->next
];
94 * We must append the existing free chain, if any, to the end of
95 * newly freed chain. If the virtqueue was completely used, then
96 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
98 if (vq
->vq_desc_tail_idx
== VQ_RING_DESC_CHAIN_END
) {
99 vq
->vq_desc_head_idx
= desc_idx
;
101 dp_tail
= &vq
->vq_ring
.desc
[vq
->vq_desc_tail_idx
];
102 dp_tail
->next
= desc_idx
;
105 vq
->vq_desc_tail_idx
= desc_idx_last
;
106 dp
->next
= VQ_RING_DESC_CHAIN_END
;
110 virtqueue_dequeue_burst_rx(struct virtqueue
*vq
, struct rte_mbuf
**rx_pkts
,
111 uint32_t *len
, uint16_t num
)
113 struct vring_used_elem
*uep
;
114 struct rte_mbuf
*cookie
;
115 uint16_t used_idx
, desc_idx
;
118 /* Caller does the check */
119 for (i
= 0; i
< num
; i
++) {
120 used_idx
= (uint16_t)(vq
->vq_used_cons_idx
& (vq
->vq_nentries
- 1));
121 uep
= &vq
->vq_ring
.used
->ring
[used_idx
];
122 desc_idx
= (uint16_t) uep
->id
;
124 cookie
= (struct rte_mbuf
*)vq
->vq_descx
[desc_idx
].cookie
;
126 if (unlikely(cookie
== NULL
)) {
127 PMD_DRV_LOG(ERR
, "vring descriptor with no mbuf cookie at %u\n",
128 vq
->vq_used_cons_idx
);
132 rte_prefetch0(cookie
);
133 rte_packet_prefetch(rte_pktmbuf_mtod(cookie
, void *));
135 vq
->vq_used_cons_idx
++;
136 vq_ring_free_chain(vq
, desc_idx
);
137 vq
->vq_descx
[desc_idx
].cookie
= NULL
;
143 #ifndef DEFAULT_TX_FREE_THRESH
144 #define DEFAULT_TX_FREE_THRESH 32
147 /* Cleanup from completed transmits. */
149 virtio_xmit_cleanup(struct virtqueue
*vq
, uint16_t num
)
151 uint16_t i
, used_idx
, desc_idx
;
152 for (i
= 0; i
< num
; i
++) {
153 struct vring_used_elem
*uep
;
154 struct vq_desc_extra
*dxp
;
156 used_idx
= (uint16_t)(vq
->vq_used_cons_idx
& (vq
->vq_nentries
- 1));
157 uep
= &vq
->vq_ring
.used
->ring
[used_idx
];
159 desc_idx
= (uint16_t) uep
->id
;
160 dxp
= &vq
->vq_descx
[desc_idx
];
161 vq
->vq_used_cons_idx
++;
162 vq_ring_free_chain(vq
, desc_idx
);
164 if (dxp
->cookie
!= NULL
) {
165 rte_pktmbuf_free(dxp
->cookie
);
173 virtqueue_enqueue_recv_refill(struct virtqueue
*vq
, struct rte_mbuf
*cookie
)
175 struct vq_desc_extra
*dxp
;
176 struct virtio_hw
*hw
= vq
->hw
;
177 struct vring_desc
*start_dp
;
179 uint16_t head_idx
, idx
;
181 if (unlikely(vq
->vq_free_cnt
== 0))
183 if (unlikely(vq
->vq_free_cnt
< needed
))
186 head_idx
= vq
->vq_desc_head_idx
;
187 if (unlikely(head_idx
>= vq
->vq_nentries
))
191 dxp
= &vq
->vq_descx
[idx
];
192 dxp
->cookie
= (void *)cookie
;
193 dxp
->ndescs
= needed
;
195 start_dp
= vq
->vq_ring
.desc
;
197 VIRTIO_MBUF_ADDR(cookie
, vq
) +
198 RTE_PKTMBUF_HEADROOM
- hw
->vtnet_hdr_size
;
200 cookie
->buf_len
- RTE_PKTMBUF_HEADROOM
+ hw
->vtnet_hdr_size
;
201 start_dp
[idx
].flags
= VRING_DESC_F_WRITE
;
202 idx
= start_dp
[idx
].next
;
203 vq
->vq_desc_head_idx
= idx
;
204 if (vq
->vq_desc_head_idx
== VQ_RING_DESC_CHAIN_END
)
205 vq
->vq_desc_tail_idx
= idx
;
206 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
- needed
);
207 vq_update_avail_ring(vq
, head_idx
);
212 /* When doing TSO, the IP length is not included in the pseudo header
213 * checksum of the packet given to the PMD, but for virtio it is
217 virtio_tso_fix_cksum(struct rte_mbuf
*m
)
219 /* common case: header is not fragmented */
220 if (likely(rte_pktmbuf_data_len(m
) >= m
->l2_len
+ m
->l3_len
+
222 struct ipv4_hdr
*iph
;
223 struct ipv6_hdr
*ip6h
;
225 uint16_t prev_cksum
, new_cksum
, ip_len
, ip_paylen
;
228 iph
= rte_pktmbuf_mtod_offset(m
, struct ipv4_hdr
*, m
->l2_len
);
229 th
= RTE_PTR_ADD(iph
, m
->l3_len
);
230 if ((iph
->version_ihl
>> 4) == 4) {
231 iph
->hdr_checksum
= 0;
232 iph
->hdr_checksum
= rte_ipv4_cksum(iph
);
233 ip_len
= iph
->total_length
;
234 ip_paylen
= rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len
) -
237 ip6h
= (struct ipv6_hdr
*)iph
;
238 ip_paylen
= ip6h
->payload_len
;
241 /* calculate the new phdr checksum not including ip_paylen */
242 prev_cksum
= th
->cksum
;
245 tmp
= (tmp
& 0xffff) + (tmp
>> 16);
248 /* replace it in the packet */
249 th
->cksum
= new_cksum
;
254 tx_offload_enabled(struct virtio_hw
*hw
)
256 return vtpci_with_feature(hw
, VIRTIO_NET_F_CSUM
) ||
257 vtpci_with_feature(hw
, VIRTIO_NET_F_HOST_TSO4
) ||
258 vtpci_with_feature(hw
, VIRTIO_NET_F_HOST_TSO6
);
262 virtqueue_enqueue_xmit(struct virtnet_tx
*txvq
, struct rte_mbuf
*cookie
,
263 uint16_t needed
, int use_indirect
, int can_push
)
265 struct virtio_tx_region
*txr
= txvq
->virtio_net_hdr_mz
->addr
;
266 struct vq_desc_extra
*dxp
;
267 struct virtqueue
*vq
= txvq
->vq
;
268 struct vring_desc
*start_dp
;
269 uint16_t seg_num
= cookie
->nb_segs
;
270 uint16_t head_idx
, idx
;
271 uint16_t head_size
= vq
->hw
->vtnet_hdr_size
;
272 struct virtio_net_hdr
*hdr
;
275 offload
= tx_offload_enabled(vq
->hw
);
276 head_idx
= vq
->vq_desc_head_idx
;
278 dxp
= &vq
->vq_descx
[idx
];
279 dxp
->cookie
= (void *)cookie
;
280 dxp
->ndescs
= needed
;
282 start_dp
= vq
->vq_ring
.desc
;
285 /* prepend cannot fail, checked by caller */
286 hdr
= (struct virtio_net_hdr
*)
287 rte_pktmbuf_prepend(cookie
, head_size
);
288 /* if offload disabled, it is not zeroed below, do it now */
290 memset(hdr
, 0, head_size
);
291 } else if (use_indirect
) {
292 /* setup tx ring slot to point to indirect
293 * descriptor list stored in reserved region.
295 * the first slot in indirect ring is already preset
296 * to point to the header in reserved region
298 start_dp
[idx
].addr
= txvq
->virtio_net_hdr_mem
+
299 RTE_PTR_DIFF(&txr
[idx
].tx_indir
, txr
);
300 start_dp
[idx
].len
= (seg_num
+ 1) * sizeof(struct vring_desc
);
301 start_dp
[idx
].flags
= VRING_DESC_F_INDIRECT
;
302 hdr
= (struct virtio_net_hdr
*)&txr
[idx
].tx_hdr
;
304 /* loop below will fill in rest of the indirect elements */
305 start_dp
= txr
[idx
].tx_indir
;
308 /* setup first tx ring slot to point to header
309 * stored in reserved region.
311 start_dp
[idx
].addr
= txvq
->virtio_net_hdr_mem
+
312 RTE_PTR_DIFF(&txr
[idx
].tx_hdr
, txr
);
313 start_dp
[idx
].len
= vq
->hw
->vtnet_hdr_size
;
314 start_dp
[idx
].flags
= VRING_DESC_F_NEXT
;
315 hdr
= (struct virtio_net_hdr
*)&txr
[idx
].tx_hdr
;
317 idx
= start_dp
[idx
].next
;
320 /* Checksum Offload / TSO */
322 if (cookie
->ol_flags
& PKT_TX_TCP_SEG
)
323 cookie
->ol_flags
|= PKT_TX_TCP_CKSUM
;
325 switch (cookie
->ol_flags
& PKT_TX_L4_MASK
) {
326 case PKT_TX_UDP_CKSUM
:
327 hdr
->csum_start
= cookie
->l2_len
+ cookie
->l3_len
;
328 hdr
->csum_offset
= offsetof(struct udp_hdr
,
330 hdr
->flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
333 case PKT_TX_TCP_CKSUM
:
334 hdr
->csum_start
= cookie
->l2_len
+ cookie
->l3_len
;
335 hdr
->csum_offset
= offsetof(struct tcp_hdr
, cksum
);
336 hdr
->flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
341 hdr
->csum_offset
= 0;
346 /* TCP Segmentation Offload */
347 if (cookie
->ol_flags
& PKT_TX_TCP_SEG
) {
348 virtio_tso_fix_cksum(cookie
);
349 hdr
->gso_type
= (cookie
->ol_flags
& PKT_TX_IPV6
) ?
350 VIRTIO_NET_HDR_GSO_TCPV6
:
351 VIRTIO_NET_HDR_GSO_TCPV4
;
352 hdr
->gso_size
= cookie
->tso_segsz
;
365 start_dp
[idx
].addr
= VIRTIO_MBUF_DATA_DMA_ADDR(cookie
, vq
);
366 start_dp
[idx
].len
= cookie
->data_len
;
367 start_dp
[idx
].flags
= cookie
->next
? VRING_DESC_F_NEXT
: 0;
368 idx
= start_dp
[idx
].next
;
369 } while ((cookie
= cookie
->next
) != NULL
);
372 idx
= vq
->vq_ring
.desc
[head_idx
].next
;
374 vq
->vq_desc_head_idx
= idx
;
375 if (vq
->vq_desc_head_idx
== VQ_RING_DESC_CHAIN_END
)
376 vq
->vq_desc_tail_idx
= idx
;
377 vq
->vq_free_cnt
= (uint16_t)(vq
->vq_free_cnt
- needed
);
378 vq_update_avail_ring(vq
, head_idx
);
382 virtio_dev_cq_start(struct rte_eth_dev
*dev
)
384 struct virtio_hw
*hw
= dev
->data
->dev_private
;
386 if (hw
->cvq
&& hw
->cvq
->vq
) {
387 VIRTQUEUE_DUMP((struct virtqueue
*)hw
->cvq
->vq
);
392 virtio_dev_rx_queue_setup(struct rte_eth_dev
*dev
,
395 unsigned int socket_id __rte_unused
,
396 __rte_unused
const struct rte_eth_rxconf
*rx_conf
,
397 struct rte_mempool
*mp
)
399 uint16_t vtpci_queue_idx
= 2 * queue_idx
+ VTNET_SQ_RQ_QUEUE_IDX
;
400 struct virtio_hw
*hw
= dev
->data
->dev_private
;
401 struct virtqueue
*vq
= hw
->vqs
[vtpci_queue_idx
];
402 struct virtnet_rx
*rxvq
;
407 PMD_INIT_FUNC_TRACE();
409 if (nb_desc
== 0 || nb_desc
> vq
->vq_nentries
)
410 nb_desc
= vq
->vq_nentries
;
411 vq
->vq_free_cnt
= RTE_MIN(vq
->vq_free_cnt
, nb_desc
);
414 rxvq
->queue_id
= queue_idx
;
416 if (rxvq
->mpool
== NULL
) {
417 rte_exit(EXIT_FAILURE
,
418 "Cannot allocate mbufs for rx virtqueue");
420 dev
->data
->rx_queues
[queue_idx
] = rxvq
;
423 /* Allocate blank mbufs for the each rx descriptor */
427 if (hw
->use_simple_rxtx
) {
428 for (desc_idx
= 0; desc_idx
< vq
->vq_nentries
;
430 vq
->vq_ring
.avail
->ring
[desc_idx
] = desc_idx
;
431 vq
->vq_ring
.desc
[desc_idx
].flags
=
436 memset(&rxvq
->fake_mbuf
, 0, sizeof(rxvq
->fake_mbuf
));
437 for (desc_idx
= 0; desc_idx
< RTE_PMD_VIRTIO_RX_MAX_BURST
;
439 vq
->sw_ring
[vq
->vq_nentries
+ desc_idx
] =
443 while (!virtqueue_full(vq
)) {
444 m
= rte_mbuf_raw_alloc(rxvq
->mpool
);
448 /* Enqueue allocated buffers */
449 if (hw
->use_simple_rxtx
)
450 error
= virtqueue_enqueue_recv_refill_simple(vq
, m
);
452 error
= virtqueue_enqueue_recv_refill(vq
, m
);
461 vq_update_avail_idx(vq
);
463 PMD_INIT_LOG(DEBUG
, "Allocated %d bufs", nbufs
);
465 virtio_rxq_vec_setup(rxvq
);
473 virtio_update_rxtx_handler(struct rte_eth_dev
*dev
,
474 const struct rte_eth_txconf
*tx_conf
)
476 uint8_t use_simple_rxtx
= 0;
477 struct virtio_hw
*hw
= dev
->data
->dev_private
;
479 #if defined RTE_ARCH_X86
480 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3
))
482 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
483 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON
))
486 /* Use simple rx/tx func if single segment and no offloads */
487 if (use_simple_rxtx
&&
488 (tx_conf
->txq_flags
& VIRTIO_SIMPLE_FLAGS
) == VIRTIO_SIMPLE_FLAGS
&&
489 !vtpci_with_feature(hw
, VIRTIO_NET_F_MRG_RXBUF
)) {
490 PMD_INIT_LOG(INFO
, "Using simple rx/tx path");
491 dev
->tx_pkt_burst
= virtio_xmit_pkts_simple
;
492 dev
->rx_pkt_burst
= virtio_recv_pkts_vec
;
493 hw
->use_simple_rxtx
= use_simple_rxtx
;
498 * struct rte_eth_dev *dev: Used to update dev
499 * uint16_t nb_desc: Defaults to values read from config space
500 * unsigned int socket_id: Used to allocate memzone
501 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
502 * uint16_t queue_idx: Just used as an index in dev txq list
505 virtio_dev_tx_queue_setup(struct rte_eth_dev
*dev
,
508 unsigned int socket_id __rte_unused
,
509 const struct rte_eth_txconf
*tx_conf
)
511 uint8_t vtpci_queue_idx
= 2 * queue_idx
+ VTNET_SQ_TQ_QUEUE_IDX
;
512 struct virtio_hw
*hw
= dev
->data
->dev_private
;
513 struct virtqueue
*vq
= hw
->vqs
[vtpci_queue_idx
];
514 struct virtnet_tx
*txvq
;
515 uint16_t tx_free_thresh
;
518 PMD_INIT_FUNC_TRACE();
520 virtio_update_rxtx_handler(dev
, tx_conf
);
522 if (nb_desc
== 0 || nb_desc
> vq
->vq_nentries
)
523 nb_desc
= vq
->vq_nentries
;
524 vq
->vq_free_cnt
= RTE_MIN(vq
->vq_free_cnt
, nb_desc
);
527 txvq
->queue_id
= queue_idx
;
529 tx_free_thresh
= tx_conf
->tx_free_thresh
;
530 if (tx_free_thresh
== 0)
532 RTE_MIN(vq
->vq_nentries
/ 4, DEFAULT_TX_FREE_THRESH
);
534 if (tx_free_thresh
>= (vq
->vq_nentries
- 3)) {
535 RTE_LOG(ERR
, PMD
, "tx_free_thresh must be less than the "
536 "number of TX entries minus 3 (%u)."
537 " (tx_free_thresh=%u port=%u queue=%u)\n",
539 tx_free_thresh
, dev
->data
->port_id
, queue_idx
);
543 vq
->vq_free_thresh
= tx_free_thresh
;
545 if (hw
->use_simple_rxtx
) {
546 uint16_t mid_idx
= vq
->vq_nentries
>> 1;
548 for (desc_idx
= 0; desc_idx
< mid_idx
; desc_idx
++) {
549 vq
->vq_ring
.avail
->ring
[desc_idx
] =
551 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].next
=
553 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].addr
=
554 txvq
->virtio_net_hdr_mem
+
555 offsetof(struct virtio_tx_region
, tx_hdr
);
556 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].len
=
557 vq
->hw
->vtnet_hdr_size
;
558 vq
->vq_ring
.desc
[desc_idx
+ mid_idx
].flags
=
560 vq
->vq_ring
.desc
[desc_idx
].flags
= 0;
562 for (desc_idx
= mid_idx
; desc_idx
< vq
->vq_nentries
;
564 vq
->vq_ring
.avail
->ring
[desc_idx
] = desc_idx
;
569 dev
->data
->tx_queues
[queue_idx
] = txvq
;
574 virtio_discard_rxbuf(struct virtqueue
*vq
, struct rte_mbuf
*m
)
578 * Requeue the discarded mbuf. This should always be
579 * successful since it was just dequeued.
581 error
= virtqueue_enqueue_recv_refill(vq
, m
);
582 if (unlikely(error
)) {
583 RTE_LOG(ERR
, PMD
, "cannot requeue discarded mbuf");
589 virtio_update_packet_stats(struct virtnet_stats
*stats
, struct rte_mbuf
*mbuf
)
591 uint32_t s
= mbuf
->pkt_len
;
592 struct ether_addr
*ea
;
595 stats
->size_bins
[1]++;
596 } else if (s
> 64 && s
< 1024) {
599 /* count zeros, and offset into correct bin */
600 bin
= (sizeof(s
) * 8) - __builtin_clz(s
) - 5;
601 stats
->size_bins
[bin
]++;
604 stats
->size_bins
[0]++;
606 stats
->size_bins
[6]++;
608 stats
->size_bins
[7]++;
611 ea
= rte_pktmbuf_mtod(mbuf
, struct ether_addr
*);
612 if (is_multicast_ether_addr(ea
)) {
613 if (is_broadcast_ether_addr(ea
))
620 /* Optionally fill offload information in structure */
622 virtio_rx_offload(struct rte_mbuf
*m
, struct virtio_net_hdr
*hdr
)
624 struct rte_net_hdr_lens hdr_lens
;
625 uint32_t hdrlen
, ptype
;
626 int l4_supported
= 0;
629 if (hdr
->flags
== 0 && hdr
->gso_type
== VIRTIO_NET_HDR_GSO_NONE
)
632 m
->ol_flags
|= PKT_RX_IP_CKSUM_UNKNOWN
;
634 ptype
= rte_net_get_ptype(m
, &hdr_lens
, RTE_PTYPE_ALL_MASK
);
635 m
->packet_type
= ptype
;
636 if ((ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_TCP
||
637 (ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_UDP
||
638 (ptype
& RTE_PTYPE_L4_MASK
) == RTE_PTYPE_L4_SCTP
)
641 if (hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
642 hdrlen
= hdr_lens
.l2_len
+ hdr_lens
.l3_len
+ hdr_lens
.l4_len
;
643 if (hdr
->csum_start
<= hdrlen
&& l4_supported
) {
644 m
->ol_flags
|= PKT_RX_L4_CKSUM_NONE
;
646 /* Unknown proto or tunnel, do sw cksum. We can assume
647 * the cksum field is in the first segment since the
648 * buffers we provided to the host are large enough.
649 * In case of SCTP, this will be wrong since it's a CRC
650 * but there's nothing we can do.
654 rte_raw_cksum_mbuf(m
, hdr
->csum_start
,
655 rte_pktmbuf_pkt_len(m
) - hdr
->csum_start
,
657 if (likely(csum
!= 0xffff))
659 off
= hdr
->csum_offset
+ hdr
->csum_start
;
660 if (rte_pktmbuf_data_len(m
) >= off
+ 1)
661 *rte_pktmbuf_mtod_offset(m
, uint16_t *,
664 } else if (hdr
->flags
& VIRTIO_NET_HDR_F_DATA_VALID
&& l4_supported
) {
665 m
->ol_flags
|= PKT_RX_L4_CKSUM_GOOD
;
668 /* GSO request, save required information in mbuf */
669 if (hdr
->gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
670 /* Check unsupported modes */
671 if ((hdr
->gso_type
& VIRTIO_NET_HDR_GSO_ECN
) ||
672 (hdr
->gso_size
== 0)) {
676 /* Update mss lengthes in mbuf */
677 m
->tso_segsz
= hdr
->gso_size
;
678 switch (hdr
->gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
679 case VIRTIO_NET_HDR_GSO_TCPV4
:
680 case VIRTIO_NET_HDR_GSO_TCPV6
:
681 m
->ol_flags
|= PKT_RX_LRO
| \
682 PKT_RX_L4_CKSUM_NONE
;
693 rx_offload_enabled(struct virtio_hw
*hw
)
695 return vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_CSUM
) ||
696 vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_TSO4
) ||
697 vtpci_with_feature(hw
, VIRTIO_NET_F_GUEST_TSO6
);
700 #define VIRTIO_MBUF_BURST_SZ 64
701 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
703 virtio_recv_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
, uint16_t nb_pkts
)
705 struct virtnet_rx
*rxvq
= rx_queue
;
706 struct virtqueue
*vq
= rxvq
->vq
;
707 struct virtio_hw
*hw
;
708 struct rte_mbuf
*rxm
, *new_mbuf
;
709 uint16_t nb_used
, num
, nb_rx
;
710 uint32_t len
[VIRTIO_MBUF_BURST_SZ
];
711 struct rte_mbuf
*rcv_pkts
[VIRTIO_MBUF_BURST_SZ
];
713 uint32_t i
, nb_enqueued
;
716 struct virtio_net_hdr
*hdr
;
718 nb_used
= VIRTQUEUE_NUSED(vq
);
722 num
= (uint16_t)(likely(nb_used
<= nb_pkts
) ? nb_used
: nb_pkts
);
723 num
= (uint16_t)(likely(num
<= VIRTIO_MBUF_BURST_SZ
) ? num
: VIRTIO_MBUF_BURST_SZ
);
724 if (likely(num
> DESC_PER_CACHELINE
))
725 num
= num
- ((vq
->vq_used_cons_idx
+ num
) % DESC_PER_CACHELINE
);
727 num
= virtqueue_dequeue_burst_rx(vq
, rcv_pkts
, len
, num
);
728 PMD_RX_LOG(DEBUG
, "used:%d dequeue:%d", nb_used
, num
);
733 hdr_size
= hw
->vtnet_hdr_size
;
734 offload
= rx_offload_enabled(hw
);
736 for (i
= 0; i
< num
; i
++) {
739 PMD_RX_LOG(DEBUG
, "packet len:%d", len
[i
]);
741 if (unlikely(len
[i
] < hdr_size
+ ETHER_HDR_LEN
)) {
742 PMD_RX_LOG(ERR
, "Packet drop");
744 virtio_discard_rxbuf(vq
, rxm
);
745 rxvq
->stats
.errors
++;
749 rxm
->port
= rxvq
->port_id
;
750 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
;
756 rxm
->pkt_len
= (uint32_t)(len
[i
] - hdr_size
);
757 rxm
->data_len
= (uint16_t)(len
[i
] - hdr_size
);
759 hdr
= (struct virtio_net_hdr
*)((char *)rxm
->buf_addr
+
760 RTE_PKTMBUF_HEADROOM
- hdr_size
);
765 if (offload
&& virtio_rx_offload(rxm
, hdr
) < 0) {
766 virtio_discard_rxbuf(vq
, rxm
);
767 rxvq
->stats
.errors
++;
771 VIRTIO_DUMP_PACKET(rxm
, rxm
->data_len
);
773 rx_pkts
[nb_rx
++] = rxm
;
775 rxvq
->stats
.bytes
+= rx_pkts
[nb_rx
- 1]->pkt_len
;
776 virtio_update_packet_stats(&rxvq
->stats
, rxm
);
779 rxvq
->stats
.packets
+= nb_rx
;
781 /* Allocate new mbuf for the used descriptor */
783 while (likely(!virtqueue_full(vq
))) {
784 new_mbuf
= rte_mbuf_raw_alloc(rxvq
->mpool
);
785 if (unlikely(new_mbuf
== NULL
)) {
786 struct rte_eth_dev
*dev
787 = &rte_eth_devices
[rxvq
->port_id
];
788 dev
->data
->rx_mbuf_alloc_failed
++;
791 error
= virtqueue_enqueue_recv_refill(vq
, new_mbuf
);
792 if (unlikely(error
)) {
793 rte_pktmbuf_free(new_mbuf
);
799 if (likely(nb_enqueued
)) {
800 vq_update_avail_idx(vq
);
802 if (unlikely(virtqueue_kick_prepare(vq
))) {
803 virtqueue_notify(vq
);
804 PMD_RX_LOG(DEBUG
, "Notified");
812 virtio_recv_mergeable_pkts(void *rx_queue
,
813 struct rte_mbuf
**rx_pkts
,
816 struct virtnet_rx
*rxvq
= rx_queue
;
817 struct virtqueue
*vq
= rxvq
->vq
;
818 struct virtio_hw
*hw
;
819 struct rte_mbuf
*rxm
, *new_mbuf
;
820 uint16_t nb_used
, num
, nb_rx
;
821 uint32_t len
[VIRTIO_MBUF_BURST_SZ
];
822 struct rte_mbuf
*rcv_pkts
[VIRTIO_MBUF_BURST_SZ
];
823 struct rte_mbuf
*prev
;
825 uint32_t i
, nb_enqueued
;
832 nb_used
= VIRTQUEUE_NUSED(vq
);
836 PMD_RX_LOG(DEBUG
, "used:%d", nb_used
);
845 hdr_size
= hw
->vtnet_hdr_size
;
846 offload
= rx_offload_enabled(hw
);
848 while (i
< nb_used
) {
849 struct virtio_net_hdr_mrg_rxbuf
*header
;
851 if (nb_rx
== nb_pkts
)
854 num
= virtqueue_dequeue_burst_rx(vq
, rcv_pkts
, len
, 1);
860 PMD_RX_LOG(DEBUG
, "dequeue:%d", num
);
861 PMD_RX_LOG(DEBUG
, "packet len:%d", len
[0]);
865 if (unlikely(len
[0] < hdr_size
+ ETHER_HDR_LEN
)) {
866 PMD_RX_LOG(ERR
, "Packet drop");
868 virtio_discard_rxbuf(vq
, rxm
);
869 rxvq
->stats
.errors
++;
873 header
= (struct virtio_net_hdr_mrg_rxbuf
*)((char *)rxm
->buf_addr
+
874 RTE_PKTMBUF_HEADROOM
- hdr_size
);
875 seg_num
= header
->num_buffers
;
880 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
;
881 rxm
->nb_segs
= seg_num
;
885 rxm
->pkt_len
= (uint32_t)(len
[0] - hdr_size
);
886 rxm
->data_len
= (uint16_t)(len
[0] - hdr_size
);
888 rxm
->port
= rxvq
->port_id
;
889 rx_pkts
[nb_rx
] = rxm
;
892 if (offload
&& virtio_rx_offload(rxm
, &header
->hdr
) < 0) {
893 virtio_discard_rxbuf(vq
, rxm
);
894 rxvq
->stats
.errors
++;
898 seg_res
= seg_num
- 1;
900 while (seg_res
!= 0) {
902 * Get extra segments for current uncompleted packet.
905 RTE_MIN(seg_res
, RTE_DIM(rcv_pkts
));
906 if (likely(VIRTQUEUE_NUSED(vq
) >= rcv_cnt
)) {
908 virtqueue_dequeue_burst_rx(vq
,
909 rcv_pkts
, len
, rcv_cnt
);
914 "No enough segments for packet.");
916 virtio_discard_rxbuf(vq
, rxm
);
917 rxvq
->stats
.errors
++;
923 while (extra_idx
< rcv_cnt
) {
924 rxm
= rcv_pkts
[extra_idx
];
926 rxm
->data_off
= RTE_PKTMBUF_HEADROOM
- hdr_size
;
928 rxm
->pkt_len
= (uint32_t)(len
[extra_idx
]);
929 rxm
->data_len
= (uint16_t)(len
[extra_idx
]);
935 rx_pkts
[nb_rx
]->pkt_len
+= rxm
->pkt_len
;
942 rte_vlan_strip(rx_pkts
[nb_rx
]);
944 VIRTIO_DUMP_PACKET(rx_pkts
[nb_rx
],
945 rx_pkts
[nb_rx
]->data_len
);
947 rxvq
->stats
.bytes
+= rx_pkts
[nb_rx
]->pkt_len
;
948 virtio_update_packet_stats(&rxvq
->stats
, rx_pkts
[nb_rx
]);
952 rxvq
->stats
.packets
+= nb_rx
;
954 /* Allocate new mbuf for the used descriptor */
956 while (likely(!virtqueue_full(vq
))) {
957 new_mbuf
= rte_mbuf_raw_alloc(rxvq
->mpool
);
958 if (unlikely(new_mbuf
== NULL
)) {
959 struct rte_eth_dev
*dev
960 = &rte_eth_devices
[rxvq
->port_id
];
961 dev
->data
->rx_mbuf_alloc_failed
++;
964 error
= virtqueue_enqueue_recv_refill(vq
, new_mbuf
);
965 if (unlikely(error
)) {
966 rte_pktmbuf_free(new_mbuf
);
972 if (likely(nb_enqueued
)) {
973 vq_update_avail_idx(vq
);
975 if (unlikely(virtqueue_kick_prepare(vq
))) {
976 virtqueue_notify(vq
);
977 PMD_RX_LOG(DEBUG
, "Notified");
985 virtio_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
, uint16_t nb_pkts
)
987 struct virtnet_tx
*txvq
= tx_queue
;
988 struct virtqueue
*vq
= txvq
->vq
;
989 struct virtio_hw
*hw
= vq
->hw
;
990 uint16_t hdr_size
= hw
->vtnet_hdr_size
;
991 uint16_t nb_used
, nb_tx
;
994 if (unlikely(nb_pkts
< 1))
997 PMD_TX_LOG(DEBUG
, "%d packets to xmit", nb_pkts
);
998 nb_used
= VIRTQUEUE_NUSED(vq
);
1001 if (likely(nb_used
> vq
->vq_nentries
- vq
->vq_free_thresh
))
1002 virtio_xmit_cleanup(vq
, nb_used
);
1004 for (nb_tx
= 0; nb_tx
< nb_pkts
; nb_tx
++) {
1005 struct rte_mbuf
*txm
= tx_pkts
[nb_tx
];
1006 int can_push
= 0, use_indirect
= 0, slots
, need
;
1008 /* Do VLAN tag insertion */
1009 if (unlikely(txm
->ol_flags
& PKT_TX_VLAN_PKT
)) {
1010 error
= rte_vlan_insert(&txm
);
1011 if (unlikely(error
)) {
1012 rte_pktmbuf_free(txm
);
1017 /* optimize ring usage */
1018 if (vtpci_with_feature(hw
, VIRTIO_F_ANY_LAYOUT
) &&
1019 rte_mbuf_refcnt_read(txm
) == 1 &&
1020 RTE_MBUF_DIRECT(txm
) &&
1021 txm
->nb_segs
== 1 &&
1022 rte_pktmbuf_headroom(txm
) >= hdr_size
&&
1023 rte_is_aligned(rte_pktmbuf_mtod(txm
, char *),
1024 __alignof__(struct virtio_net_hdr_mrg_rxbuf
)))
1026 else if (vtpci_with_feature(hw
, VIRTIO_RING_F_INDIRECT_DESC
) &&
1027 txm
->nb_segs
< VIRTIO_MAX_TX_INDIRECT
)
1030 /* How many main ring entries are needed to this Tx?
1031 * any_layout => number of segments
1033 * default => number of segments + 1
1035 slots
= use_indirect
? 1 : (txm
->nb_segs
+ !can_push
);
1036 need
= slots
- vq
->vq_free_cnt
;
1038 /* Positive value indicates it need free vring descriptors */
1039 if (unlikely(need
> 0)) {
1040 nb_used
= VIRTQUEUE_NUSED(vq
);
1042 need
= RTE_MIN(need
, (int)nb_used
);
1044 virtio_xmit_cleanup(vq
, need
);
1045 need
= slots
- vq
->vq_free_cnt
;
1046 if (unlikely(need
> 0)) {
1048 "No free tx descriptors to transmit");
1053 /* Enqueue Packet buffers */
1054 virtqueue_enqueue_xmit(txvq
, txm
, slots
, use_indirect
, can_push
);
1056 txvq
->stats
.bytes
+= txm
->pkt_len
;
1057 virtio_update_packet_stats(&txvq
->stats
, txm
);
1060 txvq
->stats
.packets
+= nb_tx
;
1062 if (likely(nb_tx
)) {
1063 vq_update_avail_idx(vq
);
1065 if (unlikely(virtqueue_kick_prepare(vq
))) {
1066 virtqueue_notify(vq
);
1067 PMD_TX_LOG(DEBUG
, "Notified backend after xmit");