]>
Commit | Line | Data |
---|---|---|
3595e2eb VK |
1 | /* |
2 | * Vhost User Bridge | |
3 | * | |
4 | * Copyright (c) 2015 Red Hat, Inc. | |
5 | * | |
6 | * Authors: | |
7 | * Victor Kaplansky <victork@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | /* | |
14 | * TODO: | |
15 | * - main should get parameters from the command line. | |
5c93c473 VK |
16 | * - implement all request handlers. Still not implemented: |
17 | * vubr_get_queue_num_exec() | |
18 | * vubr_send_rarp_exec() | |
3595e2eb VK |
19 | * - test for broken requests and virtqueue. |
20 | * - implement features defined by Virtio 1.0 spec. | |
21 | * - support mergeable buffers and indirect descriptors. | |
3595e2eb VK |
22 | * - implement clean shutdown. |
23 | * - implement non-blocking writes to UDP backend. | |
24 | * - implement polling strategy. | |
5c93c473 VK |
25 | * - implement clean starting/stopping of vq processing |
26 | * - implement clean starting/stopping of used and buffers | |
27 | * dirty page logging. | |
3595e2eb VK |
28 | */ |
29 | ||
5c93c473 VK |
30 | #define _FILE_OFFSET_BITS 64 |
31 | ||
681c28a3 | 32 | #include "qemu/osdep.h" |
3595e2eb VK |
33 | #include <sys/socket.h> |
34 | #include <sys/un.h> | |
35 | #include <sys/unistd.h> | |
36 | #include <sys/mman.h> | |
37 | #include <sys/eventfd.h> | |
38 | #include <arpa/inet.h> | |
7cf32491 | 39 | #include <netdb.h> |
a28c393c | 40 | #include <qemu/osdep.h> |
3595e2eb VK |
41 | |
42 | #include <linux/vhost.h> | |
43 | ||
44 | #include "qemu/atomic.h" | |
45 | #include "standard-headers/linux/virtio_net.h" | |
46 | #include "standard-headers/linux/virtio_ring.h" | |
47 | ||
48 | #define VHOST_USER_BRIDGE_DEBUG 1 | |
49 | ||
50 | #define DPRINT(...) \ | |
51 | do { \ | |
52 | if (VHOST_USER_BRIDGE_DEBUG) { \ | |
53 | printf(__VA_ARGS__); \ | |
54 | } \ | |
55 | } while (0) | |
56 | ||
57 | typedef void (*CallbackFunc)(int sock, void *ctx); | |
58 | ||
59 | typedef struct Event { | |
60 | void *ctx; | |
61 | CallbackFunc callback; | |
62 | } Event; | |
63 | ||
64 | typedef struct Dispatcher { | |
65 | int max_sock; | |
66 | fd_set fdset; | |
67 | Event events[FD_SETSIZE]; | |
68 | } Dispatcher; | |
69 | ||
70 | static void | |
71 | vubr_die(const char *s) | |
72 | { | |
73 | perror(s); | |
74 | exit(1); | |
75 | } | |
76 | ||
77 | static int | |
78 | dispatcher_init(Dispatcher *dispr) | |
79 | { | |
80 | FD_ZERO(&dispr->fdset); | |
81 | dispr->max_sock = -1; | |
82 | return 0; | |
83 | } | |
84 | ||
85 | static int | |
86 | dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) | |
87 | { | |
88 | if (sock >= FD_SETSIZE) { | |
89 | fprintf(stderr, | |
90 | "Error: Failed to add new event. sock %d should be less than %d\n", | |
91 | sock, FD_SETSIZE); | |
92 | return -1; | |
93 | } | |
94 | ||
95 | dispr->events[sock].ctx = ctx; | |
96 | dispr->events[sock].callback = cb; | |
97 | ||
98 | FD_SET(sock, &dispr->fdset); | |
99 | if (sock > dispr->max_sock) { | |
100 | dispr->max_sock = sock; | |
101 | } | |
102 | DPRINT("Added sock %d for watching. max_sock: %d\n", | |
103 | sock, dispr->max_sock); | |
104 | return 0; | |
105 | } | |
106 | ||
3595e2eb VK |
107 | /* dispatcher_remove() is not currently in use but may be useful |
108 | * in the future. */ | |
109 | static int | |
110 | dispatcher_remove(Dispatcher *dispr, int sock) | |
111 | { | |
112 | if (sock >= FD_SETSIZE) { | |
113 | fprintf(stderr, | |
114 | "Error: Failed to remove event. sock %d should be less than %d\n", | |
115 | sock, FD_SETSIZE); | |
116 | return -1; | |
117 | } | |
118 | ||
119 | FD_CLR(sock, &dispr->fdset); | |
6d0b908a | 120 | DPRINT("Sock %d removed from dispatcher watch.\n", sock); |
3595e2eb VK |
121 | return 0; |
122 | } | |
3595e2eb VK |
123 | |
124 | /* timeout in us */ | |
125 | static int | |
126 | dispatcher_wait(Dispatcher *dispr, uint32_t timeout) | |
127 | { | |
128 | struct timeval tv; | |
129 | tv.tv_sec = timeout / 1000000; | |
130 | tv.tv_usec = timeout % 1000000; | |
131 | ||
132 | fd_set fdset = dispr->fdset; | |
133 | ||
134 | /* wait until some of sockets become readable. */ | |
135 | int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); | |
136 | ||
137 | if (rc == -1) { | |
138 | vubr_die("select"); | |
139 | } | |
140 | ||
141 | /* Timeout */ | |
142 | if (rc == 0) { | |
143 | return 0; | |
144 | } | |
145 | ||
146 | /* Now call callback for every ready socket. */ | |
147 | ||
148 | int sock; | |
6d0b908a VK |
149 | for (sock = 0; sock < dispr->max_sock + 1; sock++) { |
150 | /* The callback on a socket can remove other sockets from the | |
151 | * dispatcher, thus we have to check that the socket is | |
152 | * still not removed from dispatcher's list | |
153 | */ | |
154 | if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) { | |
3595e2eb VK |
155 | Event *e = &dispr->events[sock]; |
156 | e->callback(sock, e->ctx); | |
157 | } | |
6d0b908a | 158 | } |
3595e2eb VK |
159 | |
160 | return 0; | |
161 | } | |
162 | ||
163 | typedef struct VubrVirtq { | |
164 | int call_fd; | |
165 | int kick_fd; | |
166 | uint32_t size; | |
167 | uint16_t last_avail_index; | |
168 | uint16_t last_used_index; | |
169 | struct vring_desc *desc; | |
170 | struct vring_avail *avail; | |
171 | struct vring_used *used; | |
5c93c473 VK |
172 | uint64_t log_guest_addr; |
173 | int enable; | |
3595e2eb VK |
174 | } VubrVirtq; |
175 | ||
176 | /* Based on qemu/hw/virtio/vhost-user.c */ | |
177 | ||
178 | #define VHOST_MEMORY_MAX_NREGIONS 8 | |
179 | #define VHOST_USER_F_PROTOCOL_FEATURES 30 | |
a28c393c VK |
180 | /* v1.0 compliant. */ |
181 | #define VIRTIO_F_VERSION_1 32 | |
3595e2eb | 182 | |
5c93c473 VK |
183 | #define VHOST_LOG_PAGE 4096 |
184 | ||
3595e2eb VK |
185 | enum VhostUserProtocolFeature { |
186 | VHOST_USER_PROTOCOL_F_MQ = 0, | |
187 | VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, | |
188 | VHOST_USER_PROTOCOL_F_RARP = 2, | |
189 | ||
190 | VHOST_USER_PROTOCOL_F_MAX | |
191 | }; | |
192 | ||
193 | #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) | |
194 | ||
195 | typedef enum VhostUserRequest { | |
196 | VHOST_USER_NONE = 0, | |
197 | VHOST_USER_GET_FEATURES = 1, | |
198 | VHOST_USER_SET_FEATURES = 2, | |
199 | VHOST_USER_SET_OWNER = 3, | |
60915dc4 | 200 | VHOST_USER_RESET_OWNER = 4, |
3595e2eb VK |
201 | VHOST_USER_SET_MEM_TABLE = 5, |
202 | VHOST_USER_SET_LOG_BASE = 6, | |
203 | VHOST_USER_SET_LOG_FD = 7, | |
204 | VHOST_USER_SET_VRING_NUM = 8, | |
205 | VHOST_USER_SET_VRING_ADDR = 9, | |
206 | VHOST_USER_SET_VRING_BASE = 10, | |
207 | VHOST_USER_GET_VRING_BASE = 11, | |
208 | VHOST_USER_SET_VRING_KICK = 12, | |
209 | VHOST_USER_SET_VRING_CALL = 13, | |
210 | VHOST_USER_SET_VRING_ERR = 14, | |
211 | VHOST_USER_GET_PROTOCOL_FEATURES = 15, | |
212 | VHOST_USER_SET_PROTOCOL_FEATURES = 16, | |
213 | VHOST_USER_GET_QUEUE_NUM = 17, | |
214 | VHOST_USER_SET_VRING_ENABLE = 18, | |
215 | VHOST_USER_SEND_RARP = 19, | |
216 | VHOST_USER_MAX | |
217 | } VhostUserRequest; | |
218 | ||
219 | typedef struct VhostUserMemoryRegion { | |
220 | uint64_t guest_phys_addr; | |
221 | uint64_t memory_size; | |
222 | uint64_t userspace_addr; | |
223 | uint64_t mmap_offset; | |
224 | } VhostUserMemoryRegion; | |
225 | ||
226 | typedef struct VhostUserMemory { | |
227 | uint32_t nregions; | |
228 | uint32_t padding; | |
229 | VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
230 | } VhostUserMemory; | |
231 | ||
5c93c473 VK |
232 | typedef struct VhostUserLog { |
233 | uint64_t mmap_size; | |
234 | uint64_t mmap_offset; | |
235 | } VhostUserLog; | |
236 | ||
3595e2eb VK |
237 | typedef struct VhostUserMsg { |
238 | VhostUserRequest request; | |
239 | ||
240 | #define VHOST_USER_VERSION_MASK (0x3) | |
241 | #define VHOST_USER_REPLY_MASK (0x1<<2) | |
242 | uint32_t flags; | |
243 | uint32_t size; /* the following payload size */ | |
244 | union { | |
245 | #define VHOST_USER_VRING_IDX_MASK (0xff) | |
246 | #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) | |
247 | uint64_t u64; | |
248 | struct vhost_vring_state state; | |
249 | struct vhost_vring_addr addr; | |
250 | VhostUserMemory memory; | |
5c93c473 | 251 | VhostUserLog log; |
3595e2eb VK |
252 | } payload; |
253 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
254 | int fd_num; | |
255 | } QEMU_PACKED VhostUserMsg; | |
256 | ||
257 | #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) | |
258 | ||
259 | /* The version of the protocol we support */ | |
260 | #define VHOST_USER_VERSION (0x1) | |
261 | ||
262 | #define MAX_NR_VIRTQUEUE (8) | |
263 | ||
264 | typedef struct VubrDevRegion { | |
265 | /* Guest Physical address. */ | |
266 | uint64_t gpa; | |
267 | /* Memory region size. */ | |
268 | uint64_t size; | |
269 | /* QEMU virtual address (userspace). */ | |
270 | uint64_t qva; | |
271 | /* Starting offset in our mmaped space. */ | |
272 | uint64_t mmap_offset; | |
273 | /* Start address of mmaped space. */ | |
274 | uint64_t mmap_addr; | |
275 | } VubrDevRegion; | |
276 | ||
277 | typedef struct VubrDev { | |
278 | int sock; | |
279 | Dispatcher dispatcher; | |
280 | uint32_t nregions; | |
281 | VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
282 | VubrVirtq vq[MAX_NR_VIRTQUEUE]; | |
5c93c473 VK |
283 | int log_call_fd; |
284 | uint64_t log_size; | |
285 | uint8_t *log_table; | |
3595e2eb VK |
286 | int backend_udp_sock; |
287 | struct sockaddr_in backend_udp_dest; | |
5c93c473 VK |
288 | int ready; |
289 | uint64_t features; | |
a28c393c | 290 | int hdrlen; |
3595e2eb VK |
291 | } VubrDev; |
292 | ||
293 | static const char *vubr_request_str[] = { | |
294 | [VHOST_USER_NONE] = "VHOST_USER_NONE", | |
295 | [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", | |
296 | [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", | |
297 | [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", | |
60915dc4 | 298 | [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", |
3595e2eb VK |
299 | [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", |
300 | [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", | |
301 | [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", | |
302 | [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", | |
303 | [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", | |
304 | [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", | |
305 | [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", | |
306 | [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", | |
307 | [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", | |
308 | [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", | |
309 | [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", | |
310 | [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", | |
311 | [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", | |
312 | [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", | |
313 | [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", | |
314 | [VHOST_USER_MAX] = "VHOST_USER_MAX", | |
315 | }; | |
316 | ||
317 | static void | |
318 | print_buffer(uint8_t *buf, size_t len) | |
319 | { | |
320 | int i; | |
321 | printf("Raw buffer:\n"); | |
322 | for (i = 0; i < len; i++) { | |
323 | if (i % 16 == 0) { | |
324 | printf("\n"); | |
325 | } | |
326 | if (i % 4 == 0) { | |
327 | printf(" "); | |
328 | } | |
329 | printf("%02x ", buf[i]); | |
330 | } | |
331 | printf("\n............................................................\n"); | |
332 | } | |
333 | ||
334 | /* Translate guest physical address to our virtual address. */ | |
335 | static uint64_t | |
336 | gpa_to_va(VubrDev *dev, uint64_t guest_addr) | |
337 | { | |
338 | int i; | |
339 | ||
340 | /* Find matching memory region. */ | |
341 | for (i = 0; i < dev->nregions; i++) { | |
342 | VubrDevRegion *r = &dev->regions[i]; | |
343 | ||
344 | if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { | |
345 | return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; | |
346 | } | |
347 | } | |
348 | ||
349 | assert(!"address not found in regions"); | |
350 | return 0; | |
351 | } | |
352 | ||
353 | /* Translate qemu virtual address to our virtual address. */ | |
354 | static uint64_t | |
355 | qva_to_va(VubrDev *dev, uint64_t qemu_addr) | |
356 | { | |
357 | int i; | |
358 | ||
359 | /* Find matching memory region. */ | |
360 | for (i = 0; i < dev->nregions; i++) { | |
361 | VubrDevRegion *r = &dev->regions[i]; | |
362 | ||
363 | if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { | |
364 | return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; | |
365 | } | |
366 | } | |
367 | ||
368 | assert(!"address not found in regions"); | |
369 | return 0; | |
370 | } | |
371 | ||
372 | static void | |
373 | vubr_message_read(int conn_fd, VhostUserMsg *vmsg) | |
374 | { | |
375 | char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; | |
376 | struct iovec iov = { | |
377 | .iov_base = (char *)vmsg, | |
378 | .iov_len = VHOST_USER_HDR_SIZE, | |
379 | }; | |
380 | struct msghdr msg = { | |
381 | .msg_iov = &iov, | |
382 | .msg_iovlen = 1, | |
383 | .msg_control = control, | |
384 | .msg_controllen = sizeof(control), | |
385 | }; | |
386 | size_t fd_size; | |
387 | struct cmsghdr *cmsg; | |
388 | int rc; | |
389 | ||
390 | rc = recvmsg(conn_fd, &msg, 0); | |
391 | ||
5c93c473 VK |
392 | if (rc == 0) { |
393 | vubr_die("recvmsg"); | |
394 | fprintf(stderr, "Peer disconnected.\n"); | |
395 | exit(1); | |
396 | } | |
397 | if (rc < 0) { | |
3595e2eb VK |
398 | vubr_die("recvmsg"); |
399 | } | |
400 | ||
401 | vmsg->fd_num = 0; | |
402 | for (cmsg = CMSG_FIRSTHDR(&msg); | |
403 | cmsg != NULL; | |
404 | cmsg = CMSG_NXTHDR(&msg, cmsg)) | |
405 | { | |
406 | if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { | |
407 | fd_size = cmsg->cmsg_len - CMSG_LEN(0); | |
408 | vmsg->fd_num = fd_size / sizeof(int); | |
409 | memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); | |
410 | break; | |
411 | } | |
412 | } | |
413 | ||
414 | if (vmsg->size > sizeof(vmsg->payload)) { | |
415 | fprintf(stderr, | |
416 | "Error: too big message request: %d, size: vmsg->size: %u, " | |
5602b39f | 417 | "while sizeof(vmsg->payload) = %zu\n", |
3595e2eb VK |
418 | vmsg->request, vmsg->size, sizeof(vmsg->payload)); |
419 | exit(1); | |
420 | } | |
421 | ||
422 | if (vmsg->size) { | |
423 | rc = read(conn_fd, &vmsg->payload, vmsg->size); | |
5c93c473 VK |
424 | if (rc == 0) { |
425 | vubr_die("recvmsg"); | |
426 | fprintf(stderr, "Peer disconnected.\n"); | |
427 | exit(1); | |
428 | } | |
429 | if (rc < 0) { | |
3595e2eb VK |
430 | vubr_die("recvmsg"); |
431 | } | |
432 | ||
433 | assert(rc == vmsg->size); | |
434 | } | |
435 | } | |
436 | ||
437 | static void | |
438 | vubr_message_write(int conn_fd, VhostUserMsg *vmsg) | |
439 | { | |
440 | int rc; | |
441 | ||
442 | do { | |
443 | rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); | |
444 | } while (rc < 0 && errno == EINTR); | |
445 | ||
446 | if (rc < 0) { | |
447 | vubr_die("write"); | |
448 | } | |
449 | } | |
450 | ||
451 | static void | |
452 | vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) | |
453 | { | |
454 | int slen = sizeof(struct sockaddr_in); | |
455 | ||
456 | if (sendto(dev->backend_udp_sock, buf, len, 0, | |
457 | (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { | |
458 | vubr_die("sendto()"); | |
459 | } | |
460 | } | |
461 | ||
462 | static int | |
463 | vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) | |
464 | { | |
465 | int slen = sizeof(struct sockaddr_in); | |
466 | int rc; | |
467 | ||
468 | rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, | |
469 | (struct sockaddr *) &dev->backend_udp_dest, | |
470 | (socklen_t *)&slen); | |
471 | if (rc == -1) { | |
472 | vubr_die("recvfrom()"); | |
473 | } | |
474 | ||
475 | return rc; | |
476 | } | |
477 | ||
478 | static void | |
479 | vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) | |
480 | { | |
a28c393c VK |
481 | int hdrlen = dev->hdrlen; |
482 | DPRINT(" hdrlen = %d\n", dev->hdrlen); | |
3595e2eb VK |
483 | |
484 | if (VHOST_USER_BRIDGE_DEBUG) { | |
485 | print_buffer(buf, len); | |
486 | } | |
487 | vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); | |
488 | } | |
489 | ||
5c93c473 VK |
490 | /* Kick the log_call_fd if required. */ |
491 | static void | |
492 | vubr_log_kick(VubrDev *dev) | |
493 | { | |
494 | if (dev->log_call_fd != -1) { | |
495 | DPRINT("Kicking the QEMU's log...\n"); | |
496 | eventfd_write(dev->log_call_fd, 1); | |
497 | } | |
498 | } | |
499 | ||
3595e2eb VK |
500 | /* Kick the guest if necessary. */ |
501 | static void | |
502 | vubr_virtqueue_kick(VubrVirtq *vq) | |
503 | { | |
504 | if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | |
505 | DPRINT("Kicking the guest...\n"); | |
506 | eventfd_write(vq->call_fd, 1); | |
507 | } | |
508 | } | |
509 | ||
5c93c473 VK |
510 | static void |
511 | vubr_log_page(uint8_t *log_table, uint64_t page) | |
512 | { | |
513 | DPRINT("Logged dirty guest page: %"PRId64"\n", page); | |
514 | atomic_or(&log_table[page / 8], 1 << (page % 8)); | |
515 | } | |
516 | ||
517 | static void | |
518 | vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) | |
519 | { | |
520 | uint64_t page; | |
521 | ||
522 | if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || | |
523 | !dev->log_table || !length) { | |
524 | return; | |
525 | } | |
526 | ||
527 | assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); | |
528 | ||
529 | page = address / VHOST_LOG_PAGE; | |
530 | while (page * VHOST_LOG_PAGE < address + length) { | |
531 | vubr_log_page(dev->log_table, page); | |
532 | page += VHOST_LOG_PAGE; | |
533 | } | |
534 | vubr_log_kick(dev); | |
535 | } | |
536 | ||
3595e2eb VK |
537 | static void |
538 | vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) | |
539 | { | |
5c93c473 | 540 | struct vring_desc *desc = vq->desc; |
3595e2eb | 541 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
542 | struct vring_used *used = vq->used; |
543 | uint64_t log_guest_addr = vq->log_guest_addr; | |
a28c393c | 544 | int32_t remaining_len = len; |
3595e2eb VK |
545 | |
546 | unsigned int size = vq->size; | |
547 | ||
548 | uint16_t avail_index = atomic_mb_read(&avail->idx); | |
549 | ||
550 | /* We check the available descriptors before posting the | |
551 | * buffer, so here we assume that enough available | |
552 | * descriptors. */ | |
553 | assert(vq->last_avail_index != avail_index); | |
554 | uint16_t a_index = vq->last_avail_index % size; | |
555 | uint16_t u_index = vq->last_used_index % size; | |
556 | uint16_t d_index = avail->ring[a_index]; | |
557 | ||
558 | int i = d_index; | |
a28c393c | 559 | uint32_t written_len = 0; |
3595e2eb | 560 | |
a28c393c VK |
561 | do { |
562 | DPRINT("Post packet to guest on vq:\n"); | |
563 | DPRINT(" size = %d\n", vq->size); | |
564 | DPRINT(" last_avail_index = %d\n", vq->last_avail_index); | |
565 | DPRINT(" last_used_index = %d\n", vq->last_used_index); | |
566 | DPRINT(" a_index = %d\n", a_index); | |
567 | DPRINT(" u_index = %d\n", u_index); | |
568 | DPRINT(" d_index = %d\n", d_index); | |
569 | DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); | |
570 | DPRINT(" desc[%d].len = %d\n", i, desc[i].len); | |
571 | DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); | |
572 | DPRINT(" avail->idx = %d\n", avail_index); | |
573 | DPRINT(" used->idx = %d\n", used->idx); | |
574 | ||
575 | if (!(desc[i].flags & VRING_DESC_F_WRITE)) { | |
576 | /* FIXME: we should find writable descriptor. */ | |
577 | fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); | |
578 | exit(1); | |
579 | } | |
3595e2eb | 580 | |
5602b39f | 581 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); |
a28c393c VK |
582 | uint32_t chunk_len = desc[i].len; |
583 | uint32_t chunk_write_len = MIN(remaining_len, chunk_len); | |
3595e2eb | 584 | |
a28c393c VK |
585 | memcpy(chunk_start, buf + written_len, chunk_write_len); |
586 | vubr_log_write(dev, desc[i].addr, chunk_write_len); | |
587 | remaining_len -= chunk_write_len; | |
588 | written_len += chunk_write_len; | |
589 | ||
590 | if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) { | |
591 | break; | |
592 | } | |
593 | ||
594 | i = desc[i].next; | |
595 | } while (1); | |
596 | ||
597 | if (remaining_len > 0) { | |
598 | fprintf(stderr, | |
599 | "Too long packet for RX, remaining_len = %d, Dropping...\n", | |
600 | remaining_len); | |
601 | return; | |
3595e2eb VK |
602 | } |
603 | ||
604 | /* Add descriptor to the used ring. */ | |
605 | used->ring[u_index].id = d_index; | |
606 | used->ring[u_index].len = len; | |
5c93c473 VK |
607 | vubr_log_write(dev, |
608 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
609 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
610 | |
611 | vq->last_avail_index++; | |
612 | vq->last_used_index++; | |
613 | ||
614 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
615 | vubr_log_write(dev, |
616 | log_guest_addr + offsetof(struct vring_used, idx), | |
617 | sizeof(used->idx)); | |
3595e2eb VK |
618 | |
619 | /* Kick the guest if necessary. */ | |
620 | vubr_virtqueue_kick(vq); | |
621 | } | |
622 | ||
623 | static int | |
624 | vubr_process_desc(VubrDev *dev, VubrVirtq *vq) | |
625 | { | |
5c93c473 | 626 | struct vring_desc *desc = vq->desc; |
3595e2eb | 627 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
628 | struct vring_used *used = vq->used; |
629 | uint64_t log_guest_addr = vq->log_guest_addr; | |
3595e2eb VK |
630 | |
631 | unsigned int size = vq->size; | |
632 | ||
633 | uint16_t a_index = vq->last_avail_index % size; | |
634 | uint16_t u_index = vq->last_used_index % size; | |
635 | uint16_t d_index = avail->ring[a_index]; | |
636 | ||
637 | uint32_t i, len = 0; | |
638 | size_t buf_size = 4096; | |
639 | uint8_t buf[4096]; | |
640 | ||
641 | DPRINT("Chunks: "); | |
642 | i = d_index; | |
643 | do { | |
5602b39f | 644 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); |
3595e2eb VK |
645 | uint32_t chunk_len = desc[i].len; |
646 | ||
5c93c473 VK |
647 | assert(!(desc[i].flags & VRING_DESC_F_WRITE)); |
648 | ||
3595e2eb VK |
649 | if (len + chunk_len < buf_size) { |
650 | memcpy(buf + len, chunk_start, chunk_len); | |
651 | DPRINT("%d ", chunk_len); | |
652 | } else { | |
653 | fprintf(stderr, "Error: too long packet. Dropping...\n"); | |
654 | break; | |
655 | } | |
656 | ||
657 | len += chunk_len; | |
658 | ||
659 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) { | |
660 | break; | |
661 | } | |
662 | ||
663 | i = desc[i].next; | |
664 | } while (1); | |
665 | DPRINT("\n"); | |
666 | ||
667 | if (!len) { | |
668 | return -1; | |
669 | } | |
670 | ||
671 | /* Add descriptor to the used ring. */ | |
672 | used->ring[u_index].id = d_index; | |
673 | used->ring[u_index].len = len; | |
5c93c473 VK |
674 | vubr_log_write(dev, |
675 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
676 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
677 | |
678 | vubr_consume_raw_packet(dev, buf, len); | |
679 | ||
680 | return 0; | |
681 | } | |
682 | ||
683 | static void | |
684 | vubr_process_avail(VubrDev *dev, VubrVirtq *vq) | |
685 | { | |
686 | struct vring_avail *avail = vq->avail; | |
687 | struct vring_used *used = vq->used; | |
5c93c473 | 688 | uint64_t log_guest_addr = vq->log_guest_addr; |
3595e2eb VK |
689 | |
690 | while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { | |
691 | vubr_process_desc(dev, vq); | |
692 | vq->last_avail_index++; | |
693 | vq->last_used_index++; | |
694 | } | |
695 | ||
696 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
697 | vubr_log_write(dev, |
698 | log_guest_addr + offsetof(struct vring_used, idx), | |
699 | sizeof(used->idx)); | |
3595e2eb VK |
700 | } |
701 | ||
702 | static void | |
703 | vubr_backend_recv_cb(int sock, void *ctx) | |
704 | { | |
705 | VubrDev *dev = (VubrDev *) ctx; | |
706 | VubrVirtq *rx_vq = &dev->vq[0]; | |
707 | uint8_t buf[4096]; | |
708 | struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; | |
a28c393c | 709 | int hdrlen = dev->hdrlen; |
3595e2eb VK |
710 | int buflen = sizeof(buf); |
711 | int len; | |
712 | ||
5c93c473 VK |
713 | if (!dev->ready) { |
714 | return; | |
715 | } | |
716 | ||
3595e2eb | 717 | DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); |
a28c393c | 718 | DPRINT(" hdrlen = %d\n", hdrlen); |
3595e2eb VK |
719 | |
720 | uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); | |
721 | ||
722 | /* If there is no available descriptors, just do nothing. | |
723 | * The buffer will be handled by next arrived UDP packet, | |
724 | * or next kick on receive virtq. */ | |
725 | if (rx_vq->last_avail_index == avail_index) { | |
726 | DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); | |
727 | return; | |
728 | } | |
729 | ||
a28c393c VK |
730 | memset(buf, 0, hdrlen); |
731 | /* TODO: support mergeable buffers. */ | |
732 | if (hdrlen == 12) | |
733 | hdr->num_buffers = 1; | |
3595e2eb VK |
734 | len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); |
735 | ||
3595e2eb VK |
736 | vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); |
737 | } | |
738 | ||
739 | static void | |
740 | vubr_kick_cb(int sock, void *ctx) | |
741 | { | |
742 | VubrDev *dev = (VubrDev *) ctx; | |
743 | eventfd_t kick_data; | |
744 | ssize_t rc; | |
745 | ||
746 | rc = eventfd_read(sock, &kick_data); | |
747 | if (rc == -1) { | |
748 | vubr_die("eventfd_read()"); | |
749 | } else { | |
750 | DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); | |
751 | vubr_process_avail(dev, &dev->vq[1]); | |
752 | } | |
753 | } | |
754 | ||
755 | static int | |
756 | vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
757 | { | |
758 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
759 | return 0; | |
760 | } | |
761 | ||
762 | static int | |
763 | vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
764 | { | |
765 | vmsg->payload.u64 = | |
766 | ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | | |
5c93c473 | 767 | (1ULL << VHOST_F_LOG_ALL) | |
85ea9da5 | 768 | (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | |
5c93c473 VK |
769 | (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); |
770 | ||
3595e2eb VK |
771 | vmsg->size = sizeof(vmsg->payload.u64); |
772 | ||
773 | DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
774 | ||
5c93c473 | 775 | /* Reply */ |
3595e2eb VK |
776 | return 1; |
777 | } | |
778 | ||
779 | static int | |
780 | vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
781 | { | |
782 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
a28c393c | 783 | |
5c93c473 | 784 | dev->features = vmsg->payload.u64; |
a28c393c VK |
785 | if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) || |
786 | (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) { | |
787 | dev->hdrlen = 12; | |
788 | } else { | |
789 | dev->hdrlen = 10; | |
790 | } | |
791 | ||
3595e2eb VK |
792 | return 0; |
793 | } | |
794 | ||
795 | static int | |
796 | vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
797 | { | |
798 | return 0; | |
799 | } | |
800 | ||
5c93c473 VK |
801 | static void |
802 | vubr_close_log(VubrDev *dev) | |
803 | { | |
804 | if (dev->log_table) { | |
805 | if (munmap(dev->log_table, dev->log_size) != 0) { | |
806 | vubr_die("munmap()"); | |
807 | } | |
808 | ||
809 | dev->log_table = 0; | |
810 | } | |
811 | if (dev->log_call_fd != -1) { | |
812 | close(dev->log_call_fd); | |
813 | dev->log_call_fd = -1; | |
814 | } | |
815 | } | |
816 | ||
3595e2eb VK |
817 | static int |
818 | vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
819 | { | |
5c93c473 VK |
820 | vubr_close_log(dev); |
821 | dev->ready = 0; | |
822 | dev->features = 0; | |
3595e2eb VK |
823 | return 0; |
824 | } | |
825 | ||
826 | static int | |
827 | vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
828 | { | |
829 | int i; | |
830 | VhostUserMemory *memory = &vmsg->payload.memory; | |
831 | dev->nregions = memory->nregions; | |
832 | ||
833 | DPRINT("Nregions: %d\n", memory->nregions); | |
834 | for (i = 0; i < dev->nregions; i++) { | |
835 | void *mmap_addr; | |
836 | VhostUserMemoryRegion *msg_region = &memory->regions[i]; | |
837 | VubrDevRegion *dev_region = &dev->regions[i]; | |
838 | ||
839 | DPRINT("Region %d\n", i); | |
840 | DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", | |
841 | msg_region->guest_phys_addr); | |
842 | DPRINT(" memory_size: 0x%016"PRIx64"\n", | |
843 | msg_region->memory_size); | |
844 | DPRINT(" userspace_addr 0x%016"PRIx64"\n", | |
845 | msg_region->userspace_addr); | |
846 | DPRINT(" mmap_offset 0x%016"PRIx64"\n", | |
847 | msg_region->mmap_offset); | |
848 | ||
5c93c473 VK |
849 | dev_region->gpa = msg_region->guest_phys_addr; |
850 | dev_region->size = msg_region->memory_size; | |
851 | dev_region->qva = msg_region->userspace_addr; | |
3595e2eb VK |
852 | dev_region->mmap_offset = msg_region->mmap_offset; |
853 | ||
854 | /* We don't use offset argument of mmap() since the | |
855 | * mapped address has to be page aligned, and we use huge | |
856 | * pages. */ | |
857 | mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, | |
858 | PROT_READ | PROT_WRITE, MAP_SHARED, | |
859 | vmsg->fds[i], 0); | |
860 | ||
861 | if (mmap_addr == MAP_FAILED) { | |
862 | vubr_die("mmap"); | |
863 | } | |
5602b39f | 864 | dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; |
3595e2eb | 865 | DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); |
6d0b908a VK |
866 | |
867 | close(vmsg->fds[i]); | |
3595e2eb VK |
868 | } |
869 | ||
870 | return 0; | |
871 | } | |
872 | ||
873 | static int | |
874 | vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
875 | { | |
5c93c473 VK |
876 | int fd; |
877 | uint64_t log_mmap_size, log_mmap_offset; | |
878 | void *rc; | |
879 | ||
880 | assert(vmsg->fd_num == 1); | |
881 | fd = vmsg->fds[0]; | |
882 | ||
883 | assert(vmsg->size == sizeof(vmsg->payload.log)); | |
884 | log_mmap_offset = vmsg->payload.log.mmap_offset; | |
885 | log_mmap_size = vmsg->payload.log.mmap_size; | |
886 | DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); | |
887 | DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); | |
888 | ||
889 | rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, | |
890 | log_mmap_offset); | |
891 | if (rc == MAP_FAILED) { | |
892 | vubr_die("mmap"); | |
893 | } | |
894 | dev->log_table = rc; | |
895 | dev->log_size = log_mmap_size; | |
896 | ||
897 | vmsg->size = sizeof(vmsg->payload.u64); | |
898 | /* Reply */ | |
899 | return 1; | |
3595e2eb VK |
900 | } |
901 | ||
902 | static int | |
903 | vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
904 | { | |
5c93c473 VK |
905 | assert(vmsg->fd_num == 1); |
906 | dev->log_call_fd = vmsg->fds[0]; | |
907 | DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); | |
3595e2eb VK |
908 | return 0; |
909 | } | |
910 | ||
911 | static int | |
912 | vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
913 | { | |
914 | unsigned int index = vmsg->payload.state.index; | |
915 | unsigned int num = vmsg->payload.state.num; | |
916 | ||
917 | DPRINT("State.index: %d\n", index); | |
918 | DPRINT("State.num: %d\n", num); | |
919 | dev->vq[index].size = num; | |
920 | return 0; | |
921 | } | |
922 | ||
923 | static int | |
924 | vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
925 | { | |
926 | struct vhost_vring_addr *vra = &vmsg->payload.addr; | |
927 | unsigned int index = vra->index; | |
928 | VubrVirtq *vq = &dev->vq[index]; | |
929 | ||
930 | DPRINT("vhost_vring_addr:\n"); | |
931 | DPRINT(" index: %d\n", vra->index); | |
932 | DPRINT(" flags: %d\n", vra->flags); | |
933 | DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); | |
934 | DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); | |
935 | DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); | |
936 | DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); | |
937 | ||
5602b39f MT |
938 | vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr); |
939 | vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr); | |
940 | vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr); | |
5c93c473 | 941 | vq->log_guest_addr = vra->log_guest_addr; |
3595e2eb VK |
942 | |
943 | DPRINT("Setting virtq addresses:\n"); | |
944 | DPRINT(" vring_desc at %p\n", vq->desc); | |
945 | DPRINT(" vring_used at %p\n", vq->used); | |
946 | DPRINT(" vring_avail at %p\n", vq->avail); | |
947 | ||
948 | vq->last_used_index = vq->used->idx; | |
949 | return 0; | |
950 | } | |
951 | ||
952 | static int | |
953 | vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
954 | { | |
955 | unsigned int index = vmsg->payload.state.index; | |
956 | unsigned int num = vmsg->payload.state.num; | |
957 | ||
958 | DPRINT("State.index: %d\n", index); | |
959 | DPRINT("State.num: %d\n", num); | |
960 | dev->vq[index].last_avail_index = num; | |
961 | ||
962 | return 0; | |
963 | } | |
964 | ||
965 | static int | |
966 | vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
967 | { | |
5c93c473 VK |
968 | unsigned int index = vmsg->payload.state.index; |
969 | ||
970 | DPRINT("State.index: %d\n", index); | |
971 | vmsg->payload.state.num = dev->vq[index].last_avail_index; | |
972 | vmsg->size = sizeof(vmsg->payload.state); | |
973 | /* FIXME: this is a work-around for a bug in QEMU enabling | |
974 | * too early vrings. When protocol features are enabled, | |
975 | * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ | |
976 | dev->ready = 0; | |
977 | ||
6d0b908a VK |
978 | if (dev->vq[index].call_fd != -1) { |
979 | close(dev->vq[index].call_fd); | |
980 | dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); | |
981 | dev->vq[index].call_fd = -1; | |
982 | } | |
983 | if (dev->vq[index].kick_fd != -1) { | |
984 | close(dev->vq[index].kick_fd); | |
985 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
986 | dev->vq[index].kick_fd = -1; | |
987 | } | |
988 | ||
5c93c473 VK |
989 | /* Reply */ |
990 | return 1; | |
3595e2eb VK |
991 | } |
992 | ||
993 | static int | |
994 | vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
995 | { | |
996 | uint64_t u64_arg = vmsg->payload.u64; | |
997 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
998 | ||
999 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1000 | ||
1001 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1002 | assert(vmsg->fd_num == 1); | |
1003 | ||
6d0b908a VK |
1004 | if (dev->vq[index].kick_fd != -1) { |
1005 | close(dev->vq[index].kick_fd); | |
1006 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
1007 | } | |
3595e2eb VK |
1008 | dev->vq[index].kick_fd = vmsg->fds[0]; |
1009 | DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1010 | ||
1011 | if (index % 2 == 1) { | |
1012 | /* TX queue. */ | |
1013 | dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, | |
1014 | dev, vubr_kick_cb); | |
1015 | ||
1016 | DPRINT("Waiting for kicks on fd: %d for vq: %d\n", | |
1017 | dev->vq[index].kick_fd, index); | |
1018 | } | |
5c93c473 VK |
1019 | /* We temporarily use this hack to determine that both TX and RX |
1020 | * queues are set up and ready for processing. | |
1021 | * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and | |
1022 | * actual kicks. */ | |
1023 | if (dev->vq[0].kick_fd != -1 && | |
1024 | dev->vq[1].kick_fd != -1) { | |
1025 | dev->ready = 1; | |
1026 | DPRINT("vhost-user-bridge is ready for processing queues.\n"); | |
1027 | } | |
3595e2eb | 1028 | return 0; |
5c93c473 | 1029 | |
3595e2eb VK |
1030 | } |
1031 | ||
1032 | static int | |
1033 | vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1034 | { | |
1035 | uint64_t u64_arg = vmsg->payload.u64; | |
1036 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
1037 | ||
1038 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1039 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1040 | assert(vmsg->fd_num == 1); | |
1041 | ||
6d0b908a VK |
1042 | if (dev->vq[index].call_fd != -1) { |
1043 | close(dev->vq[index].call_fd); | |
1044 | dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd); | |
1045 | } | |
3595e2eb VK |
1046 | dev->vq[index].call_fd = vmsg->fds[0]; |
1047 | DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1048 | ||
1049 | return 0; | |
1050 | } | |
1051 | ||
1052 | static int | |
1053 | vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1054 | { | |
1055 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1056 | return 0; | |
1057 | } | |
1058 | ||
1059 | static int | |
1060 | vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1061 | { | |
5c93c473 | 1062 | vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; |
3595e2eb | 1063 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); |
5c93c473 VK |
1064 | vmsg->size = sizeof(vmsg->payload.u64); |
1065 | ||
1066 | /* Reply */ | |
1067 | return 1; | |
3595e2eb VK |
1068 | } |
1069 | ||
1070 | static int | |
1071 | vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1072 | { | |
1073 | /* FIXME: unimplented */ | |
1074 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1075 | return 0; | |
1076 | } | |
1077 | ||
1078 | static int | |
1079 | vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1080 | { | |
1081 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1082 | return 0; | |
1083 | } | |
1084 | ||
1085 | static int | |
1086 | vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1087 | { | |
5c93c473 VK |
1088 | unsigned int index = vmsg->payload.state.index; |
1089 | unsigned int enable = vmsg->payload.state.num; | |
1090 | ||
1091 | DPRINT("State.index: %d\n", index); | |
1092 | DPRINT("State.enable: %d\n", enable); | |
1093 | dev->vq[index].enable = enable; | |
3595e2eb VK |
1094 | return 0; |
1095 | } | |
1096 | ||
1097 | static int | |
1098 | vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1099 | { | |
1100 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1101 | return 0; | |
1102 | } | |
1103 | ||
1104 | static int | |
1105 | vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) | |
1106 | { | |
1107 | /* Print out generic part of the request. */ | |
1108 | DPRINT( | |
1109 | "================== Vhost user message from QEMU ==================\n"); | |
1110 | DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], | |
1111 | vmsg->request); | |
1112 | DPRINT("Flags: 0x%x\n", vmsg->flags); | |
1113 | DPRINT("Size: %d\n", vmsg->size); | |
1114 | ||
1115 | if (vmsg->fd_num) { | |
1116 | int i; | |
1117 | DPRINT("Fds:"); | |
1118 | for (i = 0; i < vmsg->fd_num; i++) { | |
1119 | DPRINT(" %d", vmsg->fds[i]); | |
1120 | } | |
1121 | DPRINT("\n"); | |
1122 | } | |
1123 | ||
1124 | switch (vmsg->request) { | |
1125 | case VHOST_USER_NONE: | |
1126 | return vubr_none_exec(dev, vmsg); | |
1127 | case VHOST_USER_GET_FEATURES: | |
1128 | return vubr_get_features_exec(dev, vmsg); | |
1129 | case VHOST_USER_SET_FEATURES: | |
1130 | return vubr_set_features_exec(dev, vmsg); | |
1131 | case VHOST_USER_SET_OWNER: | |
1132 | return vubr_set_owner_exec(dev, vmsg); | |
60915dc4 | 1133 | case VHOST_USER_RESET_OWNER: |
3595e2eb VK |
1134 | return vubr_reset_device_exec(dev, vmsg); |
1135 | case VHOST_USER_SET_MEM_TABLE: | |
1136 | return vubr_set_mem_table_exec(dev, vmsg); | |
1137 | case VHOST_USER_SET_LOG_BASE: | |
1138 | return vubr_set_log_base_exec(dev, vmsg); | |
1139 | case VHOST_USER_SET_LOG_FD: | |
1140 | return vubr_set_log_fd_exec(dev, vmsg); | |
1141 | case VHOST_USER_SET_VRING_NUM: | |
1142 | return vubr_set_vring_num_exec(dev, vmsg); | |
1143 | case VHOST_USER_SET_VRING_ADDR: | |
1144 | return vubr_set_vring_addr_exec(dev, vmsg); | |
1145 | case VHOST_USER_SET_VRING_BASE: | |
1146 | return vubr_set_vring_base_exec(dev, vmsg); | |
1147 | case VHOST_USER_GET_VRING_BASE: | |
1148 | return vubr_get_vring_base_exec(dev, vmsg); | |
1149 | case VHOST_USER_SET_VRING_KICK: | |
1150 | return vubr_set_vring_kick_exec(dev, vmsg); | |
1151 | case VHOST_USER_SET_VRING_CALL: | |
1152 | return vubr_set_vring_call_exec(dev, vmsg); | |
1153 | case VHOST_USER_SET_VRING_ERR: | |
1154 | return vubr_set_vring_err_exec(dev, vmsg); | |
1155 | case VHOST_USER_GET_PROTOCOL_FEATURES: | |
1156 | return vubr_get_protocol_features_exec(dev, vmsg); | |
1157 | case VHOST_USER_SET_PROTOCOL_FEATURES: | |
1158 | return vubr_set_protocol_features_exec(dev, vmsg); | |
1159 | case VHOST_USER_GET_QUEUE_NUM: | |
1160 | return vubr_get_queue_num_exec(dev, vmsg); | |
1161 | case VHOST_USER_SET_VRING_ENABLE: | |
1162 | return vubr_set_vring_enable_exec(dev, vmsg); | |
1163 | case VHOST_USER_SEND_RARP: | |
1164 | return vubr_send_rarp_exec(dev, vmsg); | |
1165 | ||
1166 | case VHOST_USER_MAX: | |
1167 | assert(vmsg->request != VHOST_USER_MAX); | |
1168 | } | |
1169 | return 0; | |
1170 | } | |
1171 | ||
1172 | static void | |
1173 | vubr_receive_cb(int sock, void *ctx) | |
1174 | { | |
1175 | VubrDev *dev = (VubrDev *) ctx; | |
1176 | VhostUserMsg vmsg; | |
1177 | int reply_requested; | |
1178 | ||
1179 | vubr_message_read(sock, &vmsg); | |
1180 | reply_requested = vubr_execute_request(dev, &vmsg); | |
1181 | if (reply_requested) { | |
1182 | /* Set the version in the flags when sending the reply */ | |
1183 | vmsg.flags &= ~VHOST_USER_VERSION_MASK; | |
1184 | vmsg.flags |= VHOST_USER_VERSION; | |
1185 | vmsg.flags |= VHOST_USER_REPLY_MASK; | |
1186 | vubr_message_write(sock, &vmsg); | |
1187 | } | |
1188 | } | |
1189 | ||
1190 | static void | |
1191 | vubr_accept_cb(int sock, void *ctx) | |
1192 | { | |
1193 | VubrDev *dev = (VubrDev *)ctx; | |
1194 | int conn_fd; | |
1195 | struct sockaddr_un un; | |
1196 | socklen_t len = sizeof(un); | |
1197 | ||
1198 | conn_fd = accept(sock, (struct sockaddr *) &un, &len); | |
5c93c473 | 1199 | if (conn_fd == -1) { |
3595e2eb VK |
1200 | vubr_die("accept()"); |
1201 | } | |
1202 | DPRINT("Got connection from remote peer on sock %d\n", conn_fd); | |
1203 | dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); | |
1204 | } | |
1205 | ||
1206 | static VubrDev * | |
1207 | vubr_new(const char *path) | |
1208 | { | |
1209 | VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); | |
1210 | dev->nregions = 0; | |
1211 | int i; | |
1212 | struct sockaddr_un un; | |
1213 | size_t len; | |
1214 | ||
1215 | for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { | |
1216 | dev->vq[i] = (VubrVirtq) { | |
1217 | .call_fd = -1, .kick_fd = -1, | |
1218 | .size = 0, | |
1219 | .last_avail_index = 0, .last_used_index = 0, | |
1220 | .desc = 0, .avail = 0, .used = 0, | |
5c93c473 | 1221 | .enable = 0, |
3595e2eb VK |
1222 | }; |
1223 | } | |
1224 | ||
5c93c473 VK |
1225 | /* Init log */ |
1226 | dev->log_call_fd = -1; | |
1227 | dev->log_size = 0; | |
1228 | dev->log_table = 0; | |
1229 | dev->ready = 0; | |
1230 | dev->features = 0; | |
1231 | ||
3595e2eb VK |
1232 | /* Get a UNIX socket. */ |
1233 | dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1234 | if (dev->sock == -1) { | |
1235 | vubr_die("socket"); | |
1236 | } | |
1237 | ||
1238 | un.sun_family = AF_UNIX; | |
1239 | strcpy(un.sun_path, path); | |
1240 | len = sizeof(un.sun_family) + strlen(path); | |
1241 | unlink(path); | |
1242 | ||
1243 | if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { | |
1244 | vubr_die("bind"); | |
1245 | } | |
1246 | ||
1247 | if (listen(dev->sock, 1) == -1) { | |
1248 | vubr_die("listen"); | |
1249 | } | |
1250 | ||
1251 | dispatcher_init(&dev->dispatcher); | |
1252 | dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, | |
1253 | vubr_accept_cb); | |
1254 | ||
1255 | DPRINT("Waiting for connections on UNIX socket %s ...\n", path); | |
1256 | return dev; | |
1257 | } | |
1258 | ||
7cf32491 VK |
1259 | static void |
1260 | vubr_set_host(struct sockaddr_in *saddr, const char *host) | |
1261 | { | |
1262 | if (isdigit(host[0])) { | |
1263 | if (!inet_aton(host, &saddr->sin_addr)) { | |
1264 | fprintf(stderr, "inet_aton() failed.\n"); | |
1265 | exit(1); | |
1266 | } | |
1267 | } else { | |
1268 | struct hostent *he = gethostbyname(host); | |
1269 | ||
1270 | if (!he) { | |
1271 | fprintf(stderr, "gethostbyname() failed.\n"); | |
1272 | exit(1); | |
1273 | } | |
1274 | saddr->sin_addr = *(struct in_addr *)he->h_addr; | |
1275 | } | |
1276 | } | |
1277 | ||
3595e2eb VK |
1278 | static void |
1279 | vubr_backend_udp_setup(VubrDev *dev, | |
1280 | const char *local_host, | |
7cf32491 VK |
1281 | const char *local_port, |
1282 | const char *remote_host, | |
1283 | const char *remote_port) | |
3595e2eb VK |
1284 | { |
1285 | int sock; | |
7cf32491 VK |
1286 | const char *r; |
1287 | ||
1288 | int lport, rport; | |
1289 | ||
1290 | lport = strtol(local_port, (char **)&r, 0); | |
1291 | if (r == local_port) { | |
1292 | fprintf(stderr, "lport parsing failed.\n"); | |
1293 | exit(1); | |
1294 | } | |
1295 | ||
1296 | rport = strtol(remote_port, (char **)&r, 0); | |
1297 | if (r == remote_port) { | |
1298 | fprintf(stderr, "rport parsing failed.\n"); | |
1299 | exit(1); | |
1300 | } | |
1301 | ||
3595e2eb VK |
1302 | struct sockaddr_in si_local = { |
1303 | .sin_family = AF_INET, | |
7cf32491 | 1304 | .sin_port = htons(lport), |
3595e2eb VK |
1305 | }; |
1306 | ||
7cf32491 | 1307 | vubr_set_host(&si_local, local_host); |
3595e2eb VK |
1308 | |
1309 | /* setup destination for sends */ | |
1310 | dev->backend_udp_dest = (struct sockaddr_in) { | |
1311 | .sin_family = AF_INET, | |
7cf32491 | 1312 | .sin_port = htons(rport), |
3595e2eb | 1313 | }; |
7cf32491 | 1314 | vubr_set_host(&dev->backend_udp_dest, remote_host); |
3595e2eb VK |
1315 | |
1316 | sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); | |
1317 | if (sock == -1) { | |
1318 | vubr_die("socket"); | |
1319 | } | |
1320 | ||
1321 | if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { | |
1322 | vubr_die("bind"); | |
1323 | } | |
1324 | ||
1325 | dev->backend_udp_sock = sock; | |
1326 | dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); | |
1327 | DPRINT("Waiting for data from udp backend on %s:%d...\n", | |
7cf32491 | 1328 | local_host, lport); |
3595e2eb VK |
1329 | } |
1330 | ||
1331 | static void | |
1332 | vubr_run(VubrDev *dev) | |
1333 | { | |
1334 | while (1) { | |
1335 | /* timeout 200ms */ | |
1336 | dispatcher_wait(&dev->dispatcher, 200000); | |
1337 | /* Here one can try polling strategy. */ | |
1338 | } | |
1339 | } | |
1340 | ||
7cf32491 VK |
1341 | static int |
1342 | vubr_parse_host_port(const char **host, const char **port, const char *buf) | |
1343 | { | |
1344 | char *p = strchr(buf, ':'); | |
1345 | ||
1346 | if (!p) { | |
1347 | return -1; | |
1348 | } | |
1349 | *p = '\0'; | |
1350 | *host = strdup(buf); | |
1351 | *port = strdup(p + 1); | |
1352 | return 0; | |
1353 | } | |
1354 | ||
1355 | #define DEFAULT_UD_SOCKET "/tmp/vubr.sock" | |
1356 | #define DEFAULT_LHOST "127.0.0.1" | |
1357 | #define DEFAULT_LPORT "4444" | |
1358 | #define DEFAULT_RHOST "127.0.0.1" | |
1359 | #define DEFAULT_RPORT "5555" | |
1360 | ||
1361 | static const char *ud_socket_path = DEFAULT_UD_SOCKET; | |
1362 | static const char *lhost = DEFAULT_LHOST; | |
1363 | static const char *lport = DEFAULT_LPORT; | |
1364 | static const char *rhost = DEFAULT_RHOST; | |
1365 | static const char *rport = DEFAULT_RPORT; | |
1366 | ||
3595e2eb VK |
1367 | int |
1368 | main(int argc, char *argv[]) | |
1369 | { | |
1370 | VubrDev *dev; | |
7cf32491 | 1371 | int opt; |
3595e2eb | 1372 | |
7cf32491 VK |
1373 | while ((opt = getopt(argc, argv, "l:r:u:")) != -1) { |
1374 | ||
1375 | switch (opt) { | |
1376 | case 'l': | |
1377 | if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { | |
1378 | goto out; | |
1379 | } | |
1380 | break; | |
1381 | case 'r': | |
1382 | if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { | |
1383 | goto out; | |
1384 | } | |
1385 | break; | |
1386 | case 'u': | |
1387 | ud_socket_path = strdup(optarg); | |
1388 | break; | |
1389 | default: | |
1390 | goto out; | |
1391 | } | |
1392 | } | |
1393 | ||
1394 | DPRINT("ud socket: %s\n", ud_socket_path); | |
1395 | DPRINT("local: %s:%s\n", lhost, lport); | |
1396 | DPRINT("remote: %s:%s\n", rhost, rport); | |
1397 | ||
1398 | dev = vubr_new(ud_socket_path); | |
3595e2eb VK |
1399 | if (!dev) { |
1400 | return 1; | |
1401 | } | |
1402 | ||
7cf32491 | 1403 | vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); |
3595e2eb VK |
1404 | vubr_run(dev); |
1405 | return 0; | |
7cf32491 VK |
1406 | |
1407 | out: | |
1408 | fprintf(stderr, "Usage: %s ", argv[0]); | |
1409 | fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); | |
1410 | fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", | |
1411 | DEFAULT_UD_SOCKET); | |
1412 | fprintf(stderr, "\t-l local host and port. default: %s:%s\n", | |
1413 | DEFAULT_LHOST, DEFAULT_LPORT); | |
1414 | fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", | |
1415 | DEFAULT_RHOST, DEFAULT_RPORT); | |
1416 | ||
1417 | return 1; | |
3595e2eb | 1418 | } |