1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016 Cavium, Inc
11 #include <rte_byteorder.h>
12 #include <rte_branch_prediction.h>
13 #include <rte_cycles.h>
14 #include <rte_ether.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_errno.h>
17 #include <rte_memory.h>
18 #include <rte_mempool.h>
19 #include <rte_malloc.h>
21 #include <rte_prefetch.h>
22 #include <rte_string_fns.h>
25 #include "virtio_rxtx_simple.h"
27 #define RTE_VIRTIO_DESC_PER_LOOP 8
29 /* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
31 * This routine is for non-mergeable RX, one desc for each guest buffer.
32 * This routine is based on the RX ring layout optimization. Each entry in the
33 * avail ring points to the desc with the same index in the desc ring and this
34 * will never be changed in the driver.
36 * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
39 virtio_recv_pkts_vec(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
42 struct virtnet_rx
*rxvq
= rx_queue
;
43 struct virtqueue
*vq
= rxvq
->vq
;
44 struct virtio_hw
*hw
= vq
->hw
;
47 struct vring_used_elem
*rused
;
48 struct rte_mbuf
**sw_ring
;
49 struct rte_mbuf
**sw_ring_end
;
50 uint16_t nb_pkts_received
= 0;
52 uint8x16_t shuf_msk1
= {
53 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
54 4, 5, 0xFF, 0xFF, /* pkt len */
56 0xFF, 0xFF, /* vlan tci */
57 0xFF, 0xFF, 0xFF, 0xFF
60 uint8x16_t shuf_msk2
= {
61 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
62 12, 13, 0xFF, 0xFF, /* pkt len */
64 0xFF, 0xFF, /* vlan tci */
65 0xFF, 0xFF, 0xFF, 0xFF
68 /* Subtract the header length.
69 * In which case do we need the header length in used->len ?
71 uint16x8_t len_adjust
= {
73 (uint16_t)vq
->hw
->vtnet_hdr_size
, 0,
74 (uint16_t)vq
->hw
->vtnet_hdr_size
,
79 if (unlikely(hw
->started
== 0))
80 return nb_pkts_received
;
82 if (unlikely(nb_pkts
< RTE_VIRTIO_DESC_PER_LOOP
))
85 nb_used
= VIRTQUEUE_NUSED(vq
);
89 if (unlikely(nb_used
== 0))
92 nb_pkts
= RTE_ALIGN_FLOOR(nb_pkts
, RTE_VIRTIO_DESC_PER_LOOP
);
93 nb_used
= RTE_MIN(nb_used
, nb_pkts
);
95 desc_idx
= (uint16_t)(vq
->vq_used_cons_idx
& (vq
->vq_nentries
- 1));
96 rused
= &vq
->vq_split
.ring
.used
->ring
[desc_idx
];
97 sw_ring
= &vq
->sw_ring
[desc_idx
];
98 sw_ring_end
= &vq
->sw_ring
[vq
->vq_nentries
];
100 rte_prefetch_non_temporal(rused
);
102 if (vq
->vq_free_cnt
>= RTE_VIRTIO_VPMD_RX_REARM_THRESH
) {
103 virtio_rxq_rearm_vec(rxvq
);
104 if (unlikely(virtqueue_kick_prepare(vq
)))
105 virtqueue_notify(vq
);
108 for (nb_pkts_received
= 0;
109 nb_pkts_received
< nb_used
;) {
110 uint64x2_t desc
[RTE_VIRTIO_DESC_PER_LOOP
/ 2];
111 uint64x2_t mbp
[RTE_VIRTIO_DESC_PER_LOOP
/ 2];
112 uint64x2_t pkt_mb
[RTE_VIRTIO_DESC_PER_LOOP
];
114 mbp
[0] = vld1q_u64((uint64_t *)(sw_ring
+ 0));
115 desc
[0] = vld1q_u64((uint64_t *)(rused
+ 0));
116 vst1q_u64((uint64_t *)&rx_pkts
[0], mbp
[0]);
118 mbp
[1] = vld1q_u64((uint64_t *)(sw_ring
+ 2));
119 desc
[1] = vld1q_u64((uint64_t *)(rused
+ 2));
120 vst1q_u64((uint64_t *)&rx_pkts
[2], mbp
[1]);
122 mbp
[2] = vld1q_u64((uint64_t *)(sw_ring
+ 4));
123 desc
[2] = vld1q_u64((uint64_t *)(rused
+ 4));
124 vst1q_u64((uint64_t *)&rx_pkts
[4], mbp
[2]);
126 mbp
[3] = vld1q_u64((uint64_t *)(sw_ring
+ 6));
127 desc
[3] = vld1q_u64((uint64_t *)(rused
+ 6));
128 vst1q_u64((uint64_t *)&rx_pkts
[6], mbp
[3]);
130 pkt_mb
[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
131 vreinterpretq_u8_u64(desc
[0]), shuf_msk2
));
132 pkt_mb
[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
133 vreinterpretq_u8_u64(desc
[0]), shuf_msk1
));
134 pkt_mb
[1] = vreinterpretq_u64_u16(vsubq_u16(
135 vreinterpretq_u16_u64(pkt_mb
[1]), len_adjust
));
136 pkt_mb
[0] = vreinterpretq_u64_u16(vsubq_u16(
137 vreinterpretq_u16_u64(pkt_mb
[0]), len_adjust
));
138 vst1q_u64((void *)&rx_pkts
[1]->rx_descriptor_fields1
,
140 vst1q_u64((void *)&rx_pkts
[0]->rx_descriptor_fields1
,
143 pkt_mb
[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
144 vreinterpretq_u8_u64(desc
[1]), shuf_msk2
));
145 pkt_mb
[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
146 vreinterpretq_u8_u64(desc
[1]), shuf_msk1
));
147 pkt_mb
[3] = vreinterpretq_u64_u16(vsubq_u16(
148 vreinterpretq_u16_u64(pkt_mb
[3]), len_adjust
));
149 pkt_mb
[2] = vreinterpretq_u64_u16(vsubq_u16(
150 vreinterpretq_u16_u64(pkt_mb
[2]), len_adjust
));
151 vst1q_u64((void *)&rx_pkts
[3]->rx_descriptor_fields1
,
153 vst1q_u64((void *)&rx_pkts
[2]->rx_descriptor_fields1
,
156 pkt_mb
[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
157 vreinterpretq_u8_u64(desc
[2]), shuf_msk2
));
158 pkt_mb
[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
159 vreinterpretq_u8_u64(desc
[2]), shuf_msk1
));
160 pkt_mb
[5] = vreinterpretq_u64_u16(vsubq_u16(
161 vreinterpretq_u16_u64(pkt_mb
[5]), len_adjust
));
162 pkt_mb
[4] = vreinterpretq_u64_u16(vsubq_u16(
163 vreinterpretq_u16_u64(pkt_mb
[4]), len_adjust
));
164 vst1q_u64((void *)&rx_pkts
[5]->rx_descriptor_fields1
,
166 vst1q_u64((void *)&rx_pkts
[4]->rx_descriptor_fields1
,
169 pkt_mb
[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
170 vreinterpretq_u8_u64(desc
[3]), shuf_msk2
));
171 pkt_mb
[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
172 vreinterpretq_u8_u64(desc
[3]), shuf_msk1
));
173 pkt_mb
[7] = vreinterpretq_u64_u16(vsubq_u16(
174 vreinterpretq_u16_u64(pkt_mb
[7]), len_adjust
));
175 pkt_mb
[6] = vreinterpretq_u64_u16(vsubq_u16(
176 vreinterpretq_u16_u64(pkt_mb
[6]), len_adjust
));
177 vst1q_u64((void *)&rx_pkts
[7]->rx_descriptor_fields1
,
179 vst1q_u64((void *)&rx_pkts
[6]->rx_descriptor_fields1
,
182 if (unlikely(nb_used
<= RTE_VIRTIO_DESC_PER_LOOP
)) {
183 if (sw_ring
+ nb_used
<= sw_ring_end
)
184 nb_pkts_received
+= nb_used
;
186 nb_pkts_received
+= sw_ring_end
- sw_ring
;
189 if (unlikely(sw_ring
+ RTE_VIRTIO_DESC_PER_LOOP
>=
191 nb_pkts_received
+= sw_ring_end
- sw_ring
;
194 nb_pkts_received
+= RTE_VIRTIO_DESC_PER_LOOP
;
196 rx_pkts
+= RTE_VIRTIO_DESC_PER_LOOP
;
197 sw_ring
+= RTE_VIRTIO_DESC_PER_LOOP
;
198 rused
+= RTE_VIRTIO_DESC_PER_LOOP
;
199 nb_used
-= RTE_VIRTIO_DESC_PER_LOOP
;
204 vq
->vq_used_cons_idx
+= nb_pkts_received
;
205 vq
->vq_free_cnt
+= nb_pkts_received
;
206 rxvq
->stats
.packets
+= nb_pkts_received
;
207 return nb_pkts_received
;