1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
7 #include <linux/virtio_net.h>
10 #include <rte_memcpy.h>
11 #include <rte_vhost.h>
16 * A very simple vhost-user net driver implementation, without
17 * any extra features being enabled, such as TSO and mrg-Rx.
21 vs_vhost_net_setup(struct vhost_dev
*dev
)
25 struct vhost_queue
*queue
;
27 RTE_LOG(INFO
, VHOST_CONFIG
,
28 "setting builtin vhost-user net driver\n");
30 rte_vhost_get_negotiated_features(vid
, &dev
->features
);
31 if (dev
->features
& (1 << VIRTIO_NET_F_MRG_RXBUF
))
32 dev
->hdr_len
= sizeof(struct virtio_net_hdr_mrg_rxbuf
);
34 dev
->hdr_len
= sizeof(struct virtio_net_hdr
);
36 rte_vhost_get_mem_table(vid
, &dev
->mem
);
38 dev
->nr_vrings
= rte_vhost_get_vring_num(vid
);
39 for (i
= 0; i
< dev
->nr_vrings
; i
++) {
40 queue
= &dev
->queues
[i
];
42 queue
->last_used_idx
= 0;
43 queue
->last_avail_idx
= 0;
44 rte_vhost_get_vhost_vring(vid
, i
, &queue
->vr
);
49 vs_vhost_net_remove(struct vhost_dev
*dev
)
54 static __rte_always_inline
int
55 enqueue_pkt(struct vhost_dev
*dev
, struct rte_vhost_vring
*vr
,
56 struct rte_mbuf
*m
, uint16_t desc_idx
)
58 uint32_t desc_avail
, desc_offset
;
59 uint64_t desc_chunck_len
;
60 uint32_t mbuf_avail
, mbuf_offset
;
62 struct vring_desc
*desc
;
63 uint64_t desc_addr
, desc_gaddr
;
64 struct virtio_net_hdr virtio_hdr
= {0, 0, 0, 0, 0, 0};
65 /* A counter to avoid desc dead loop chain */
68 desc
= &vr
->desc
[desc_idx
];
69 desc_chunck_len
= desc
->len
;
70 desc_gaddr
= desc
->addr
;
71 desc_addr
= rte_vhost_va_from_guest_pa(
72 dev
->mem
, desc_gaddr
, &desc_chunck_len
);
74 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
75 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
76 * otherwise stores offset on the stack instead of in a register.
78 if (unlikely(desc
->len
< dev
->hdr_len
) || !desc_addr
)
81 rte_prefetch0((void *)(uintptr_t)desc_addr
);
83 /* write virtio-net header */
84 if (likely(desc_chunck_len
>= dev
->hdr_len
)) {
85 *(struct virtio_net_hdr
*)(uintptr_t)desc_addr
= virtio_hdr
;
86 desc_offset
= dev
->hdr_len
;
89 uint64_t remain
= dev
->hdr_len
;
90 uint64_t src
= (uint64_t)(uintptr_t)&virtio_hdr
, dst
;
91 uint64_t guest_addr
= desc_gaddr
;
95 dst
= rte_vhost_va_from_guest_pa(dev
->mem
,
97 if (unlikely(!dst
|| !len
))
100 rte_memcpy((void *)(uintptr_t)dst
,
101 (void *)(uintptr_t)src
,
109 desc_chunck_len
= desc
->len
- dev
->hdr_len
;
110 desc_gaddr
+= dev
->hdr_len
;
111 desc_addr
= rte_vhost_va_from_guest_pa(
112 dev
->mem
, desc_gaddr
,
114 if (unlikely(!desc_addr
))
120 desc_avail
= desc
->len
- dev
->hdr_len
;
122 mbuf_avail
= rte_pktmbuf_data_len(m
);
124 while (mbuf_avail
!= 0 || m
->next
!= NULL
) {
125 /* done with current mbuf, fetch next */
126 if (mbuf_avail
== 0) {
130 mbuf_avail
= rte_pktmbuf_data_len(m
);
133 /* done with current desc buf, fetch next */
134 if (desc_avail
== 0) {
135 if ((desc
->flags
& VRING_DESC_F_NEXT
) == 0) {
136 /* Room in vring buffer is not enough */
139 if (unlikely(desc
->next
>= vr
->size
||
140 ++nr_desc
> vr
->size
))
143 desc
= &vr
->desc
[desc
->next
];
144 desc_chunck_len
= desc
->len
;
145 desc_gaddr
= desc
->addr
;
146 desc_addr
= rte_vhost_va_from_guest_pa(
147 dev
->mem
, desc_gaddr
, &desc_chunck_len
);
148 if (unlikely(!desc_addr
))
152 desc_avail
= desc
->len
;
153 } else if (unlikely(desc_chunck_len
== 0)) {
154 desc_chunck_len
= desc_avail
;
155 desc_gaddr
+= desc_offset
;
156 desc_addr
= rte_vhost_va_from_guest_pa(dev
->mem
,
159 if (unlikely(!desc_addr
))
165 cpy_len
= RTE_MIN(desc_chunck_len
, mbuf_avail
);
166 rte_memcpy((void *)((uintptr_t)(desc_addr
+ desc_offset
)),
167 rte_pktmbuf_mtod_offset(m
, void *, mbuf_offset
),
170 mbuf_avail
-= cpy_len
;
171 mbuf_offset
+= cpy_len
;
172 desc_avail
-= cpy_len
;
173 desc_offset
+= cpy_len
;
174 desc_chunck_len
-= cpy_len
;
181 vs_enqueue_pkts(struct vhost_dev
*dev
, uint16_t queue_id
,
182 struct rte_mbuf
**pkts
, uint32_t count
)
184 struct vhost_queue
*queue
;
185 struct rte_vhost_vring
*vr
;
186 uint16_t avail_idx
, free_entries
, start_idx
;
187 uint16_t desc_indexes
[MAX_PKT_BURST
];
191 queue
= &dev
->queues
[queue_id
];
194 avail_idx
= *((volatile uint16_t *)&vr
->avail
->idx
);
195 start_idx
= queue
->last_used_idx
;
196 free_entries
= avail_idx
- start_idx
;
197 count
= RTE_MIN(count
, free_entries
);
198 count
= RTE_MIN(count
, (uint32_t)MAX_PKT_BURST
);
202 /* Retrieve all of the desc indexes first to avoid caching issues. */
203 rte_prefetch0(&vr
->avail
->ring
[start_idx
& (vr
->size
- 1)]);
204 for (i
= 0; i
< count
; i
++) {
205 used_idx
= (start_idx
+ i
) & (vr
->size
- 1);
206 desc_indexes
[i
] = vr
->avail
->ring
[used_idx
];
207 vr
->used
->ring
[used_idx
].id
= desc_indexes
[i
];
208 vr
->used
->ring
[used_idx
].len
= pkts
[i
]->pkt_len
+
212 rte_prefetch0(&vr
->desc
[desc_indexes
[0]]);
213 for (i
= 0; i
< count
; i
++) {
214 uint16_t desc_idx
= desc_indexes
[i
];
217 err
= enqueue_pkt(dev
, vr
, pkts
[i
], desc_idx
);
219 used_idx
= (start_idx
+ i
) & (vr
->size
- 1);
220 vr
->used
->ring
[used_idx
].len
= dev
->hdr_len
;
224 rte_prefetch0(&vr
->desc
[desc_indexes
[i
+1]]);
229 *(volatile uint16_t *)&vr
->used
->idx
+= count
;
230 queue
->last_used_idx
+= count
;
232 rte_vhost_vring_call(dev
->vid
, queue_id
);
237 static __rte_always_inline
int
238 dequeue_pkt(struct vhost_dev
*dev
, struct rte_vhost_vring
*vr
,
239 struct rte_mbuf
*m
, uint16_t desc_idx
,
240 struct rte_mempool
*mbuf_pool
)
242 struct vring_desc
*desc
;
243 uint64_t desc_addr
, desc_gaddr
;
244 uint32_t desc_avail
, desc_offset
;
245 uint64_t desc_chunck_len
;
246 uint32_t mbuf_avail
, mbuf_offset
;
248 struct rte_mbuf
*cur
= m
, *prev
= m
;
249 /* A counter to avoid desc dead loop chain */
250 uint32_t nr_desc
= 1;
252 desc
= &vr
->desc
[desc_idx
];
253 if (unlikely((desc
->len
< dev
->hdr_len
)) ||
254 (desc
->flags
& VRING_DESC_F_INDIRECT
))
257 desc_chunck_len
= desc
->len
;
258 desc_gaddr
= desc
->addr
;
259 desc_addr
= rte_vhost_va_from_guest_pa(
260 dev
->mem
, desc_gaddr
, &desc_chunck_len
);
261 if (unlikely(!desc_addr
))
265 * We don't support ANY_LAYOUT, neither VERSION_1, meaning
266 * a Tx packet from guest must have 2 desc buffers at least:
267 * the first for storing the header and the others for
270 * And since we don't support TSO, we could simply skip the
273 desc
= &vr
->desc
[desc
->next
];
274 desc_chunck_len
= desc
->len
;
275 desc_gaddr
= desc
->addr
;
276 desc_addr
= rte_vhost_va_from_guest_pa(
277 dev
->mem
, desc_gaddr
, &desc_chunck_len
);
278 if (unlikely(!desc_addr
))
280 rte_prefetch0((void *)(uintptr_t)desc_addr
);
283 desc_avail
= desc
->len
;
287 mbuf_avail
= m
->buf_len
- RTE_PKTMBUF_HEADROOM
;
289 cpy_len
= RTE_MIN(desc_chunck_len
, mbuf_avail
);
290 rte_memcpy(rte_pktmbuf_mtod_offset(cur
, void *,
292 (void *)((uintptr_t)(desc_addr
+ desc_offset
)),
295 mbuf_avail
-= cpy_len
;
296 mbuf_offset
+= cpy_len
;
297 desc_avail
-= cpy_len
;
298 desc_offset
+= cpy_len
;
299 desc_chunck_len
-= cpy_len
;
301 /* This desc reaches to its end, get the next one */
302 if (desc_avail
== 0) {
303 if ((desc
->flags
& VRING_DESC_F_NEXT
) == 0)
306 if (unlikely(desc
->next
>= vr
->size
||
307 ++nr_desc
> vr
->size
))
309 desc
= &vr
->desc
[desc
->next
];
311 desc_chunck_len
= desc
->len
;
312 desc_gaddr
= desc
->addr
;
313 desc_addr
= rte_vhost_va_from_guest_pa(
314 dev
->mem
, desc_gaddr
, &desc_chunck_len
);
315 if (unlikely(!desc_addr
))
317 rte_prefetch0((void *)(uintptr_t)desc_addr
);
320 desc_avail
= desc
->len
;
321 } else if (unlikely(desc_chunck_len
== 0)) {
322 desc_chunck_len
= desc_avail
;
323 desc_gaddr
+= desc_offset
;
324 desc_addr
= rte_vhost_va_from_guest_pa(dev
->mem
,
327 if (unlikely(!desc_addr
))
334 * This mbuf reaches to its end, get a new one
337 if (mbuf_avail
== 0) {
338 cur
= rte_pktmbuf_alloc(mbuf_pool
);
339 if (unlikely(cur
== NULL
)) {
340 RTE_LOG(ERR
, VHOST_DATA
, "Failed to "
341 "allocate memory for mbuf.\n");
346 prev
->data_len
= mbuf_offset
;
348 m
->pkt_len
+= mbuf_offset
;
352 mbuf_avail
= cur
->buf_len
- RTE_PKTMBUF_HEADROOM
;
356 prev
->data_len
= mbuf_offset
;
357 m
->pkt_len
+= mbuf_offset
;
363 vs_dequeue_pkts(struct vhost_dev
*dev
, uint16_t queue_id
,
364 struct rte_mempool
*mbuf_pool
, struct rte_mbuf
**pkts
, uint16_t count
)
366 struct vhost_queue
*queue
;
367 struct rte_vhost_vring
*vr
;
368 uint32_t desc_indexes
[MAX_PKT_BURST
];
371 uint16_t free_entries
;
374 queue
= &dev
->queues
[queue_id
];
377 free_entries
= *((volatile uint16_t *)&vr
->avail
->idx
) -
378 queue
->last_avail_idx
;
379 if (free_entries
== 0)
382 /* Prefetch available and used ring */
383 avail_idx
= queue
->last_avail_idx
& (vr
->size
- 1);
384 used_idx
= queue
->last_used_idx
& (vr
->size
- 1);
385 rte_prefetch0(&vr
->avail
->ring
[avail_idx
]);
386 rte_prefetch0(&vr
->used
->ring
[used_idx
]);
388 count
= RTE_MIN(count
, MAX_PKT_BURST
);
389 count
= RTE_MIN(count
, free_entries
);
391 if (unlikely(count
== 0))
395 * Retrieve all of the head indexes first and pre-update used entries
396 * to avoid caching issues.
398 for (i
= 0; i
< count
; i
++) {
399 avail_idx
= (queue
->last_avail_idx
+ i
) & (vr
->size
- 1);
400 used_idx
= (queue
->last_used_idx
+ i
) & (vr
->size
- 1);
401 desc_indexes
[i
] = vr
->avail
->ring
[avail_idx
];
403 vr
->used
->ring
[used_idx
].id
= desc_indexes
[i
];
404 vr
->used
->ring
[used_idx
].len
= 0;
407 /* Prefetch descriptor index. */
408 rte_prefetch0(&vr
->desc
[desc_indexes
[0]]);
409 for (i
= 0; i
< count
; i
++) {
412 if (likely(i
+ 1 < count
))
413 rte_prefetch0(&vr
->desc
[desc_indexes
[i
+ 1]]);
415 pkts
[i
] = rte_pktmbuf_alloc(mbuf_pool
);
416 if (unlikely(pkts
[i
] == NULL
)) {
417 RTE_LOG(ERR
, VHOST_DATA
,
418 "Failed to allocate memory for mbuf.\n");
422 err
= dequeue_pkt(dev
, vr
, pkts
[i
], desc_indexes
[i
], mbuf_pool
);
424 rte_pktmbuf_free(pkts
[i
]);
430 queue
->last_avail_idx
+= i
;
431 queue
->last_used_idx
+= i
;
437 rte_vhost_vring_call(dev
->vid
, queue_id
);