]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright (C) Cavium networks Ltd. 2016 | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * | |
10 | * * Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | |
12 | * * Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in | |
14 | * the documentation and/or other materials provided with the | |
15 | * distribution. | |
16 | * * Neither the name of Cavium networks nor the names of its | |
17 | * contributors may be used to endorse or promote products derived | |
18 | * from this software without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | #include <stdint.h> | |
34 | #include <stdio.h> | |
35 | #include <stdlib.h> | |
36 | #include <string.h> | |
37 | #include <errno.h> | |
38 | ||
39 | #include <rte_byteorder.h> | |
40 | #include <rte_branch_prediction.h> | |
41 | #include <rte_cycles.h> | |
42 | #include <rte_ether.h> | |
43 | #include <rte_ethdev.h> | |
44 | #include <rte_errno.h> | |
45 | #include <rte_memory.h> | |
46 | #include <rte_memzone.h> | |
47 | #include <rte_mempool.h> | |
48 | #include <rte_malloc.h> | |
49 | #include <rte_mbuf.h> | |
50 | #include <rte_prefetch.h> | |
51 | #include <rte_string_fns.h> | |
52 | #include <rte_vect.h> | |
53 | ||
54 | #include "virtio_rxtx_simple.h" | |
55 | ||
56 | #define RTE_VIRTIO_VPMD_RX_BURST 32 | |
57 | #define RTE_VIRTIO_DESC_PER_LOOP 8 | |
58 | #define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST | |
59 | ||
60 | /* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) | |
61 | * | |
62 | * This routine is for non-mergeable RX, one desc for each guest buffer. | |
63 | * This routine is based on the RX ring layout optimization. Each entry in the | |
64 | * avail ring points to the desc with the same index in the desc ring and this | |
65 | * will never be changed in the driver. | |
66 | * | |
67 | * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet | |
68 | */ | |
69 | uint16_t | |
70 | virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, | |
71 | uint16_t nb_pkts) | |
72 | { | |
73 | struct virtnet_rx *rxvq = rx_queue; | |
74 | struct virtqueue *vq = rxvq->vq; | |
75 | uint16_t nb_used; | |
76 | uint16_t desc_idx; | |
77 | struct vring_used_elem *rused; | |
78 | struct rte_mbuf **sw_ring; | |
79 | struct rte_mbuf **sw_ring_end; | |
80 | uint16_t nb_pkts_received; | |
81 | ||
82 | uint8x16_t shuf_msk1 = { | |
83 | 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ | |
84 | 4, 5, 0xFF, 0xFF, /* pkt len */ | |
85 | 4, 5, /* dat len */ | |
86 | 0xFF, 0xFF, /* vlan tci */ | |
87 | 0xFF, 0xFF, 0xFF, 0xFF | |
88 | }; | |
89 | ||
90 | uint8x16_t shuf_msk2 = { | |
91 | 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ | |
92 | 12, 13, 0xFF, 0xFF, /* pkt len */ | |
93 | 12, 13, /* dat len */ | |
94 | 0xFF, 0xFF, /* vlan tci */ | |
95 | 0xFF, 0xFF, 0xFF, 0xFF | |
96 | }; | |
97 | ||
98 | /* Subtract the header length. | |
99 | * In which case do we need the header length in used->len ? | |
100 | */ | |
101 | uint16x8_t len_adjust = { | |
102 | 0, 0, | |
103 | (uint16_t)vq->hw->vtnet_hdr_size, 0, | |
104 | (uint16_t)vq->hw->vtnet_hdr_size, | |
105 | 0, | |
106 | 0, 0 | |
107 | }; | |
108 | ||
109 | if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) | |
110 | return 0; | |
111 | ||
112 | nb_used = VIRTQUEUE_NUSED(vq); | |
113 | ||
114 | rte_rmb(); | |
115 | ||
116 | if (unlikely(nb_used == 0)) | |
117 | return 0; | |
118 | ||
119 | nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); | |
120 | nb_used = RTE_MIN(nb_used, nb_pkts); | |
121 | ||
122 | desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); | |
123 | rused = &vq->vq_ring.used->ring[desc_idx]; | |
124 | sw_ring = &vq->sw_ring[desc_idx]; | |
125 | sw_ring_end = &vq->sw_ring[vq->vq_nentries]; | |
126 | ||
127 | rte_prefetch_non_temporal(rused); | |
128 | ||
129 | if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { | |
130 | virtio_rxq_rearm_vec(rxvq); | |
131 | if (unlikely(virtqueue_kick_prepare(vq))) | |
132 | virtqueue_notify(vq); | |
133 | } | |
134 | ||
135 | for (nb_pkts_received = 0; | |
136 | nb_pkts_received < nb_used;) { | |
137 | uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; | |
138 | uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; | |
139 | uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; | |
140 | ||
141 | mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0)); | |
142 | desc[0] = vld1q_u64((uint64_t *)(rused + 0)); | |
143 | vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]); | |
144 | ||
145 | mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2)); | |
146 | desc[1] = vld1q_u64((uint64_t *)(rused + 2)); | |
147 | vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]); | |
148 | ||
149 | mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4)); | |
150 | desc[2] = vld1q_u64((uint64_t *)(rused + 4)); | |
151 | vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]); | |
152 | ||
153 | mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6)); | |
154 | desc[3] = vld1q_u64((uint64_t *)(rused + 6)); | |
155 | vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]); | |
156 | ||
157 | pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
158 | vreinterpretq_u8_u64(desc[0]), shuf_msk2)); | |
159 | pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
160 | vreinterpretq_u8_u64(desc[0]), shuf_msk1)); | |
161 | pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16( | |
162 | vreinterpretq_u16_u64(pkt_mb[1]), len_adjust)); | |
163 | pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16( | |
164 | vreinterpretq_u16_u64(pkt_mb[0]), len_adjust)); | |
165 | vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1, | |
166 | pkt_mb[1]); | |
167 | vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1, | |
168 | pkt_mb[0]); | |
169 | ||
170 | pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
171 | vreinterpretq_u8_u64(desc[1]), shuf_msk2)); | |
172 | pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
173 | vreinterpretq_u8_u64(desc[1]), shuf_msk1)); | |
174 | pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16( | |
175 | vreinterpretq_u16_u64(pkt_mb[3]), len_adjust)); | |
176 | pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16( | |
177 | vreinterpretq_u16_u64(pkt_mb[2]), len_adjust)); | |
178 | vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1, | |
179 | pkt_mb[3]); | |
180 | vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1, | |
181 | pkt_mb[2]); | |
182 | ||
183 | pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
184 | vreinterpretq_u8_u64(desc[2]), shuf_msk2)); | |
185 | pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
186 | vreinterpretq_u8_u64(desc[2]), shuf_msk1)); | |
187 | pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16( | |
188 | vreinterpretq_u16_u64(pkt_mb[5]), len_adjust)); | |
189 | pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16( | |
190 | vreinterpretq_u16_u64(pkt_mb[4]), len_adjust)); | |
191 | vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1, | |
192 | pkt_mb[5]); | |
193 | vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1, | |
194 | pkt_mb[4]); | |
195 | ||
196 | pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
197 | vreinterpretq_u8_u64(desc[3]), shuf_msk2)); | |
198 | pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8( | |
199 | vreinterpretq_u8_u64(desc[3]), shuf_msk1)); | |
200 | pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16( | |
201 | vreinterpretq_u16_u64(pkt_mb[7]), len_adjust)); | |
202 | pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16( | |
203 | vreinterpretq_u16_u64(pkt_mb[6]), len_adjust)); | |
204 | vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1, | |
205 | pkt_mb[7]); | |
206 | vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1, | |
207 | pkt_mb[6]); | |
208 | ||
209 | if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { | |
210 | if (sw_ring + nb_used <= sw_ring_end) | |
211 | nb_pkts_received += nb_used; | |
212 | else | |
213 | nb_pkts_received += sw_ring_end - sw_ring; | |
214 | break; | |
215 | } else { | |
216 | if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= | |
217 | sw_ring_end)) { | |
218 | nb_pkts_received += sw_ring_end - sw_ring; | |
219 | break; | |
220 | } else { | |
221 | nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; | |
222 | ||
223 | rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; | |
224 | sw_ring += RTE_VIRTIO_DESC_PER_LOOP; | |
225 | rused += RTE_VIRTIO_DESC_PER_LOOP; | |
226 | nb_used -= RTE_VIRTIO_DESC_PER_LOOP; | |
227 | } | |
228 | } | |
229 | } | |
230 | ||
231 | vq->vq_used_cons_idx += nb_pkts_received; | |
232 | vq->vq_free_cnt += nb_pkts_received; | |
233 | rxvq->stats.packets += nb_pkts_received; | |
234 | return nb_pkts_received; | |
235 | } |