]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #ifndef _VHOST_NET_CDEV_H_ | |
35 | #define _VHOST_NET_CDEV_H_ | |
36 | #include <stdint.h> | |
37 | #include <stdio.h> | |
38 | #include <sys/types.h> | |
39 | #include <sys/queue.h> | |
40 | #include <unistd.h> | |
41 | #include <linux/vhost.h> | |
42 | ||
43 | #include <rte_log.h> | |
44 | ||
45 | #include "rte_virtio_net.h" | |
46 | ||
47 | /* Used to indicate that the device is running on a data core */ | |
48 | #define VIRTIO_DEV_RUNNING 1 | |
49 | ||
50 | /* Backend value set by guest. */ | |
51 | #define VIRTIO_DEV_STOPPED -1 | |
52 | ||
53 | #define BUF_VECTOR_MAX 256 | |
54 | ||
55 | /** | |
56 | * Structure contains buffer address, length and descriptor index | |
57 | * from vring to do scatter RX. | |
58 | */ | |
59 | struct buf_vector { | |
60 | uint64_t buf_addr; | |
61 | uint32_t buf_len; | |
62 | uint32_t desc_idx; | |
63 | }; | |
64 | ||
65 | /* | |
66 | * A structure to hold some fields needed in zero copy code path, | |
67 | * mainly for associating an mbuf with the right desc_idx. | |
68 | */ | |
69 | struct zcopy_mbuf { | |
70 | struct rte_mbuf *mbuf; | |
71 | uint32_t desc_idx; | |
72 | uint16_t in_use; | |
73 | ||
74 | TAILQ_ENTRY(zcopy_mbuf) next; | |
75 | }; | |
76 | TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf); | |
77 | ||
78 | /** | |
79 | * Structure contains variables relevant to RX/TX virtqueues. | |
80 | */ | |
81 | struct vhost_virtqueue { | |
82 | struct vring_desc *desc; | |
83 | struct vring_avail *avail; | |
84 | struct vring_used *used; | |
85 | uint32_t size; | |
86 | ||
87 | uint16_t last_avail_idx; | |
88 | uint16_t last_used_idx; | |
89 | #define VIRTIO_INVALID_EVENTFD (-1) | |
90 | #define VIRTIO_UNINITIALIZED_EVENTFD (-2) | |
91 | ||
92 | /* Backend value to determine if device should started/stopped */ | |
93 | int backend; | |
94 | /* Used to notify the guest (trigger interrupt) */ | |
95 | int callfd; | |
96 | /* Currently unused as polling mode is enabled */ | |
97 | int kickfd; | |
98 | int enabled; | |
99 | ||
100 | /* Physical address of used ring, for logging */ | |
101 | uint64_t log_guest_addr; | |
102 | ||
103 | uint16_t nr_zmbuf; | |
104 | uint16_t zmbuf_size; | |
105 | uint16_t last_zmbuf_idx; | |
106 | struct zcopy_mbuf *zmbufs; | |
107 | struct zcopy_mbuf_list zmbuf_list; | |
108 | ||
109 | struct vring_used_elem *shadow_used_ring; | |
110 | uint16_t shadow_used_idx; | |
111 | } __rte_cache_aligned; | |
112 | ||
113 | /* Old kernels have no such macro defined */ | |
114 | #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE | |
115 | #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 | |
116 | #endif | |
117 | ||
118 | ||
119 | /* | |
120 | * Make an extra wrapper for VIRTIO_NET_F_MQ and | |
121 | * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are | |
122 | * introduced since kernel v3.8. This makes our | |
123 | * code buildable for older kernel. | |
124 | */ | |
125 | #ifdef VIRTIO_NET_F_MQ | |
126 | #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX | |
127 | #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) | |
128 | #else | |
129 | #define VHOST_MAX_QUEUE_PAIRS 1 | |
130 | #define VHOST_SUPPORTS_MQ 0 | |
131 | #endif | |
132 | ||
133 | /* | |
134 | * Define virtio 1.0 for older kernels | |
135 | */ | |
136 | #ifndef VIRTIO_F_VERSION_1 | |
137 | #define VIRTIO_F_VERSION_1 32 | |
138 | #endif | |
139 | ||
140 | struct guest_page { | |
141 | uint64_t guest_phys_addr; | |
142 | uint64_t host_phys_addr; | |
143 | uint64_t size; | |
144 | }; | |
145 | ||
146 | /** | |
147 | * Device structure contains all configuration information relating | |
148 | * to the device. | |
149 | */ | |
150 | struct virtio_net { | |
151 | /* Frontend (QEMU) memory and memory region information */ | |
152 | struct virtio_memory *mem; | |
153 | uint64_t features; | |
154 | uint64_t protocol_features; | |
155 | int vid; | |
156 | uint32_t flags; | |
157 | uint16_t vhost_hlen; | |
158 | /* to tell if we need broadcast rarp packet */ | |
159 | rte_atomic16_t broadcast_rarp; | |
160 | uint32_t virt_qp_nb; | |
161 | int dequeue_zero_copy; | |
162 | struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; | |
163 | #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) | |
164 | char ifname[IF_NAME_SZ]; | |
165 | uint64_t log_size; | |
166 | uint64_t log_base; | |
167 | uint64_t log_addr; | |
168 | struct ether_addr mac; | |
169 | ||
170 | uint32_t nr_guest_pages; | |
171 | uint32_t max_guest_pages; | |
172 | struct guest_page *guest_pages; | |
173 | } __rte_cache_aligned; | |
174 | ||
175 | /** | |
176 | * Information relating to memory regions including offsets to | |
177 | * addresses in QEMUs memory file. | |
178 | */ | |
179 | struct virtio_memory_region { | |
180 | uint64_t guest_phys_addr; | |
181 | uint64_t guest_user_addr; | |
182 | uint64_t host_user_addr; | |
183 | uint64_t size; | |
184 | void *mmap_addr; | |
185 | uint64_t mmap_size; | |
186 | int fd; | |
187 | }; | |
188 | ||
189 | ||
190 | /** | |
191 | * Memory structure includes region and mapping information. | |
192 | */ | |
193 | struct virtio_memory { | |
194 | uint32_t nregions; | |
195 | struct virtio_memory_region regions[0]; | |
196 | }; | |
197 | ||
198 | ||
199 | /* Macros for printing using RTE_LOG */ | |
200 | #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 | |
201 | #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 | |
202 | ||
203 | #ifdef RTE_LIBRTE_VHOST_DEBUG | |
204 | #define VHOST_MAX_PRINT_BUFF 6072 | |
205 | #define LOG_LEVEL RTE_LOG_DEBUG | |
206 | #define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) | |
207 | #define PRINT_PACKET(device, addr, size, header) do { \ | |
208 | char *pkt_addr = (char *)(addr); \ | |
209 | unsigned int index; \ | |
210 | char packet[VHOST_MAX_PRINT_BUFF]; \ | |
211 | \ | |
212 | if ((header)) \ | |
213 | snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \ | |
214 | else \ | |
215 | snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \ | |
216 | for (index = 0; index < (size); index++) { \ | |
217 | snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ | |
218 | "%02hhx ", pkt_addr[index]); \ | |
219 | } \ | |
220 | snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ | |
221 | \ | |
222 | LOG_DEBUG(VHOST_DATA, "%s", packet); \ | |
223 | } while (0) | |
224 | #else | |
225 | #define LOG_LEVEL RTE_LOG_INFO | |
226 | #define LOG_DEBUG(log_type, fmt, args...) do {} while (0) | |
227 | #define PRINT_PACKET(device, addr, size, header) do {} while (0) | |
228 | #endif | |
229 | ||
230 | extern uint64_t VHOST_FEATURES; | |
231 | #define MAX_VHOST_DEVICE 1024 | |
232 | extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; | |
233 | ||
234 | /* Convert guest physical Address to host virtual address */ | |
235 | static inline uint64_t __attribute__((always_inline)) | |
236 | gpa_to_vva(struct virtio_net *dev, uint64_t gpa) | |
237 | { | |
238 | struct virtio_memory_region *reg; | |
239 | uint32_t i; | |
240 | ||
241 | for (i = 0; i < dev->mem->nregions; i++) { | |
242 | reg = &dev->mem->regions[i]; | |
243 | if (gpa >= reg->guest_phys_addr && | |
244 | gpa < reg->guest_phys_addr + reg->size) { | |
245 | return gpa - reg->guest_phys_addr + | |
246 | reg->host_user_addr; | |
247 | } | |
248 | } | |
249 | ||
250 | return 0; | |
251 | } | |
252 | ||
253 | /* Convert guest physical address to host physical address */ | |
254 | static inline phys_addr_t __attribute__((always_inline)) | |
255 | gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) | |
256 | { | |
257 | uint32_t i; | |
258 | struct guest_page *page; | |
259 | ||
260 | for (i = 0; i < dev->nr_guest_pages; i++) { | |
261 | page = &dev->guest_pages[i]; | |
262 | ||
263 | if (gpa >= page->guest_phys_addr && | |
264 | gpa + size < page->guest_phys_addr + page->size) { | |
265 | return gpa - page->guest_phys_addr + | |
266 | page->host_phys_addr; | |
267 | } | |
268 | } | |
269 | ||
270 | return 0; | |
271 | } | |
272 | ||
273 | struct virtio_net_device_ops const *notify_ops; | |
274 | struct virtio_net *get_device(int vid); | |
275 | ||
276 | int vhost_new_device(void); | |
277 | void cleanup_device(struct virtio_net *dev, int destroy); | |
278 | void reset_device(struct virtio_net *dev); | |
279 | void vhost_destroy_device(int); | |
280 | ||
281 | int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx); | |
282 | ||
283 | void vhost_set_ifname(int, const char *if_name, unsigned int if_len); | |
284 | void vhost_enable_dequeue_zero_copy(int vid); | |
285 | ||
286 | /* | |
287 | * Backend-specific cleanup. | |
288 | * | |
289 | * TODO: fix it; we have one backend now | |
290 | */ | |
291 | void vhost_backend_cleanup(struct virtio_net *dev); | |
292 | ||
293 | #endif /* _VHOST_NET_CDEV_H_ */ |