]>
Commit | Line | Data |
---|---|---|
3595e2eb VK |
1 | /* |
2 | * Vhost User Bridge | |
3 | * | |
4 | * Copyright (c) 2015 Red Hat, Inc. | |
5 | * | |
6 | * Authors: | |
7 | * Victor Kaplansky <victork@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | /* | |
14 | * TODO: | |
15 | * - main should get parameters from the command line. | |
5c93c473 VK |
16 | * - implement all request handlers. Still not implemented: |
17 | * vubr_get_queue_num_exec() | |
18 | * vubr_send_rarp_exec() | |
3595e2eb VK |
19 | * - test for broken requests and virtqueue. |
20 | * - implement features defined by Virtio 1.0 spec. | |
21 | * - support mergeable buffers and indirect descriptors. | |
3595e2eb VK |
22 | * - implement clean shutdown. |
23 | * - implement non-blocking writes to UDP backend. | |
24 | * - implement polling strategy. | |
5c93c473 VK |
25 | * - implement clean starting/stopping of vq processing |
26 | * - implement clean starting/stopping of used and buffers | |
27 | * dirty page logging. | |
3595e2eb VK |
28 | */ |
29 | ||
5c93c473 VK |
30 | #define _FILE_OFFSET_BITS 64 |
31 | ||
681c28a3 | 32 | #include "qemu/osdep.h" |
3595e2eb VK |
33 | #include <sys/socket.h> |
34 | #include <sys/un.h> | |
35 | #include <sys/unistd.h> | |
3595e2eb VK |
36 | #include <sys/eventfd.h> |
37 | #include <arpa/inet.h> | |
7cf32491 | 38 | #include <netdb.h> |
3595e2eb VK |
39 | #include <linux/vhost.h> |
40 | ||
41 | #include "qemu/atomic.h" | |
42 | #include "standard-headers/linux/virtio_net.h" | |
43 | #include "standard-headers/linux/virtio_ring.h" | |
44 | ||
45 | #define VHOST_USER_BRIDGE_DEBUG 1 | |
46 | ||
47 | #define DPRINT(...) \ | |
48 | do { \ | |
49 | if (VHOST_USER_BRIDGE_DEBUG) { \ | |
50 | printf(__VA_ARGS__); \ | |
51 | } \ | |
52 | } while (0) | |
53 | ||
54 | typedef void (*CallbackFunc)(int sock, void *ctx); | |
55 | ||
56 | typedef struct Event { | |
57 | void *ctx; | |
58 | CallbackFunc callback; | |
59 | } Event; | |
60 | ||
61 | typedef struct Dispatcher { | |
62 | int max_sock; | |
63 | fd_set fdset; | |
64 | Event events[FD_SETSIZE]; | |
65 | } Dispatcher; | |
66 | ||
67 | static void | |
68 | vubr_die(const char *s) | |
69 | { | |
70 | perror(s); | |
71 | exit(1); | |
72 | } | |
73 | ||
74 | static int | |
75 | dispatcher_init(Dispatcher *dispr) | |
76 | { | |
77 | FD_ZERO(&dispr->fdset); | |
78 | dispr->max_sock = -1; | |
79 | return 0; | |
80 | } | |
81 | ||
82 | static int | |
83 | dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) | |
84 | { | |
85 | if (sock >= FD_SETSIZE) { | |
86 | fprintf(stderr, | |
87 | "Error: Failed to add new event. sock %d should be less than %d\n", | |
88 | sock, FD_SETSIZE); | |
89 | return -1; | |
90 | } | |
91 | ||
92 | dispr->events[sock].ctx = ctx; | |
93 | dispr->events[sock].callback = cb; | |
94 | ||
95 | FD_SET(sock, &dispr->fdset); | |
96 | if (sock > dispr->max_sock) { | |
97 | dispr->max_sock = sock; | |
98 | } | |
99 | DPRINT("Added sock %d for watching. max_sock: %d\n", | |
100 | sock, dispr->max_sock); | |
101 | return 0; | |
102 | } | |
103 | ||
3595e2eb VK |
104 | static int |
105 | dispatcher_remove(Dispatcher *dispr, int sock) | |
106 | { | |
107 | if (sock >= FD_SETSIZE) { | |
108 | fprintf(stderr, | |
109 | "Error: Failed to remove event. sock %d should be less than %d\n", | |
110 | sock, FD_SETSIZE); | |
111 | return -1; | |
112 | } | |
113 | ||
114 | FD_CLR(sock, &dispr->fdset); | |
6d0b908a | 115 | DPRINT("Sock %d removed from dispatcher watch.\n", sock); |
3595e2eb VK |
116 | return 0; |
117 | } | |
3595e2eb VK |
118 | |
119 | /* timeout in us */ | |
120 | static int | |
121 | dispatcher_wait(Dispatcher *dispr, uint32_t timeout) | |
122 | { | |
123 | struct timeval tv; | |
124 | tv.tv_sec = timeout / 1000000; | |
125 | tv.tv_usec = timeout % 1000000; | |
126 | ||
127 | fd_set fdset = dispr->fdset; | |
128 | ||
129 | /* wait until some of sockets become readable. */ | |
130 | int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); | |
131 | ||
132 | if (rc == -1) { | |
133 | vubr_die("select"); | |
134 | } | |
135 | ||
136 | /* Timeout */ | |
137 | if (rc == 0) { | |
138 | return 0; | |
139 | } | |
140 | ||
141 | /* Now call callback for every ready socket. */ | |
142 | ||
143 | int sock; | |
6d0b908a VK |
144 | for (sock = 0; sock < dispr->max_sock + 1; sock++) { |
145 | /* The callback on a socket can remove other sockets from the | |
146 | * dispatcher, thus we have to check that the socket is | |
147 | * still not removed from dispatcher's list | |
148 | */ | |
149 | if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) { | |
3595e2eb VK |
150 | Event *e = &dispr->events[sock]; |
151 | e->callback(sock, e->ctx); | |
152 | } | |
6d0b908a | 153 | } |
3595e2eb VK |
154 | |
155 | return 0; | |
156 | } | |
157 | ||
158 | typedef struct VubrVirtq { | |
159 | int call_fd; | |
160 | int kick_fd; | |
161 | uint32_t size; | |
162 | uint16_t last_avail_index; | |
163 | uint16_t last_used_index; | |
164 | struct vring_desc *desc; | |
165 | struct vring_avail *avail; | |
166 | struct vring_used *used; | |
5c93c473 VK |
167 | uint64_t log_guest_addr; |
168 | int enable; | |
3595e2eb VK |
169 | } VubrVirtq; |
170 | ||
171 | /* Based on qemu/hw/virtio/vhost-user.c */ | |
172 | ||
173 | #define VHOST_MEMORY_MAX_NREGIONS 8 | |
174 | #define VHOST_USER_F_PROTOCOL_FEATURES 30 | |
a28c393c VK |
175 | /* v1.0 compliant. */ |
176 | #define VIRTIO_F_VERSION_1 32 | |
3595e2eb | 177 | |
5c93c473 VK |
178 | #define VHOST_LOG_PAGE 4096 |
179 | ||
3595e2eb VK |
180 | enum VhostUserProtocolFeature { |
181 | VHOST_USER_PROTOCOL_F_MQ = 0, | |
182 | VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, | |
183 | VHOST_USER_PROTOCOL_F_RARP = 2, | |
184 | ||
185 | VHOST_USER_PROTOCOL_F_MAX | |
186 | }; | |
187 | ||
188 | #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) | |
189 | ||
190 | typedef enum VhostUserRequest { | |
191 | VHOST_USER_NONE = 0, | |
192 | VHOST_USER_GET_FEATURES = 1, | |
193 | VHOST_USER_SET_FEATURES = 2, | |
194 | VHOST_USER_SET_OWNER = 3, | |
60915dc4 | 195 | VHOST_USER_RESET_OWNER = 4, |
3595e2eb VK |
196 | VHOST_USER_SET_MEM_TABLE = 5, |
197 | VHOST_USER_SET_LOG_BASE = 6, | |
198 | VHOST_USER_SET_LOG_FD = 7, | |
199 | VHOST_USER_SET_VRING_NUM = 8, | |
200 | VHOST_USER_SET_VRING_ADDR = 9, | |
201 | VHOST_USER_SET_VRING_BASE = 10, | |
202 | VHOST_USER_GET_VRING_BASE = 11, | |
203 | VHOST_USER_SET_VRING_KICK = 12, | |
204 | VHOST_USER_SET_VRING_CALL = 13, | |
205 | VHOST_USER_SET_VRING_ERR = 14, | |
206 | VHOST_USER_GET_PROTOCOL_FEATURES = 15, | |
207 | VHOST_USER_SET_PROTOCOL_FEATURES = 16, | |
208 | VHOST_USER_GET_QUEUE_NUM = 17, | |
209 | VHOST_USER_SET_VRING_ENABLE = 18, | |
210 | VHOST_USER_SEND_RARP = 19, | |
211 | VHOST_USER_MAX | |
212 | } VhostUserRequest; | |
213 | ||
214 | typedef struct VhostUserMemoryRegion { | |
215 | uint64_t guest_phys_addr; | |
216 | uint64_t memory_size; | |
217 | uint64_t userspace_addr; | |
218 | uint64_t mmap_offset; | |
219 | } VhostUserMemoryRegion; | |
220 | ||
221 | typedef struct VhostUserMemory { | |
222 | uint32_t nregions; | |
223 | uint32_t padding; | |
224 | VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
225 | } VhostUserMemory; | |
226 | ||
5c93c473 VK |
227 | typedef struct VhostUserLog { |
228 | uint64_t mmap_size; | |
229 | uint64_t mmap_offset; | |
230 | } VhostUserLog; | |
231 | ||
3595e2eb VK |
232 | typedef struct VhostUserMsg { |
233 | VhostUserRequest request; | |
234 | ||
235 | #define VHOST_USER_VERSION_MASK (0x3) | |
236 | #define VHOST_USER_REPLY_MASK (0x1<<2) | |
237 | uint32_t flags; | |
238 | uint32_t size; /* the following payload size */ | |
239 | union { | |
240 | #define VHOST_USER_VRING_IDX_MASK (0xff) | |
241 | #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) | |
242 | uint64_t u64; | |
243 | struct vhost_vring_state state; | |
244 | struct vhost_vring_addr addr; | |
245 | VhostUserMemory memory; | |
5c93c473 | 246 | VhostUserLog log; |
3595e2eb VK |
247 | } payload; |
248 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
249 | int fd_num; | |
250 | } QEMU_PACKED VhostUserMsg; | |
251 | ||
252 | #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) | |
253 | ||
254 | /* The version of the protocol we support */ | |
255 | #define VHOST_USER_VERSION (0x1) | |
256 | ||
257 | #define MAX_NR_VIRTQUEUE (8) | |
258 | ||
259 | typedef struct VubrDevRegion { | |
260 | /* Guest Physical address. */ | |
261 | uint64_t gpa; | |
262 | /* Memory region size. */ | |
263 | uint64_t size; | |
264 | /* QEMU virtual address (userspace). */ | |
265 | uint64_t qva; | |
266 | /* Starting offset in our mmaped space. */ | |
267 | uint64_t mmap_offset; | |
268 | /* Start address of mmaped space. */ | |
269 | uint64_t mmap_addr; | |
270 | } VubrDevRegion; | |
271 | ||
272 | typedef struct VubrDev { | |
273 | int sock; | |
274 | Dispatcher dispatcher; | |
275 | uint32_t nregions; | |
276 | VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
277 | VubrVirtq vq[MAX_NR_VIRTQUEUE]; | |
5c93c473 VK |
278 | int log_call_fd; |
279 | uint64_t log_size; | |
280 | uint8_t *log_table; | |
3595e2eb VK |
281 | int backend_udp_sock; |
282 | struct sockaddr_in backend_udp_dest; | |
5c93c473 VK |
283 | int ready; |
284 | uint64_t features; | |
a28c393c | 285 | int hdrlen; |
3595e2eb VK |
286 | } VubrDev; |
287 | ||
288 | static const char *vubr_request_str[] = { | |
289 | [VHOST_USER_NONE] = "VHOST_USER_NONE", | |
290 | [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", | |
291 | [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", | |
292 | [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", | |
60915dc4 | 293 | [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", |
3595e2eb VK |
294 | [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", |
295 | [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", | |
296 | [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", | |
297 | [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", | |
298 | [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", | |
299 | [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", | |
300 | [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", | |
301 | [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", | |
302 | [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", | |
303 | [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", | |
304 | [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", | |
305 | [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", | |
306 | [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", | |
307 | [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", | |
308 | [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", | |
309 | [VHOST_USER_MAX] = "VHOST_USER_MAX", | |
310 | }; | |
311 | ||
312 | static void | |
313 | print_buffer(uint8_t *buf, size_t len) | |
314 | { | |
315 | int i; | |
316 | printf("Raw buffer:\n"); | |
317 | for (i = 0; i < len; i++) { | |
318 | if (i % 16 == 0) { | |
319 | printf("\n"); | |
320 | } | |
321 | if (i % 4 == 0) { | |
322 | printf(" "); | |
323 | } | |
324 | printf("%02x ", buf[i]); | |
325 | } | |
326 | printf("\n............................................................\n"); | |
327 | } | |
328 | ||
329 | /* Translate guest physical address to our virtual address. */ | |
330 | static uint64_t | |
331 | gpa_to_va(VubrDev *dev, uint64_t guest_addr) | |
332 | { | |
333 | int i; | |
334 | ||
335 | /* Find matching memory region. */ | |
336 | for (i = 0; i < dev->nregions; i++) { | |
337 | VubrDevRegion *r = &dev->regions[i]; | |
338 | ||
339 | if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { | |
340 | return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; | |
341 | } | |
342 | } | |
343 | ||
344 | assert(!"address not found in regions"); | |
345 | return 0; | |
346 | } | |
347 | ||
348 | /* Translate qemu virtual address to our virtual address. */ | |
349 | static uint64_t | |
350 | qva_to_va(VubrDev *dev, uint64_t qemu_addr) | |
351 | { | |
352 | int i; | |
353 | ||
354 | /* Find matching memory region. */ | |
355 | for (i = 0; i < dev->nregions; i++) { | |
356 | VubrDevRegion *r = &dev->regions[i]; | |
357 | ||
358 | if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { | |
359 | return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; | |
360 | } | |
361 | } | |
362 | ||
363 | assert(!"address not found in regions"); | |
364 | return 0; | |
365 | } | |
366 | ||
367 | static void | |
368 | vubr_message_read(int conn_fd, VhostUserMsg *vmsg) | |
369 | { | |
370 | char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; | |
371 | struct iovec iov = { | |
372 | .iov_base = (char *)vmsg, | |
373 | .iov_len = VHOST_USER_HDR_SIZE, | |
374 | }; | |
375 | struct msghdr msg = { | |
376 | .msg_iov = &iov, | |
377 | .msg_iovlen = 1, | |
378 | .msg_control = control, | |
379 | .msg_controllen = sizeof(control), | |
380 | }; | |
381 | size_t fd_size; | |
382 | struct cmsghdr *cmsg; | |
383 | int rc; | |
384 | ||
385 | rc = recvmsg(conn_fd, &msg, 0); | |
386 | ||
5c93c473 | 387 | if (rc == 0) { |
5c93c473 VK |
388 | fprintf(stderr, "Peer disconnected.\n"); |
389 | exit(1); | |
390 | } | |
391 | if (rc < 0) { | |
3595e2eb VK |
392 | vubr_die("recvmsg"); |
393 | } | |
394 | ||
395 | vmsg->fd_num = 0; | |
396 | for (cmsg = CMSG_FIRSTHDR(&msg); | |
397 | cmsg != NULL; | |
398 | cmsg = CMSG_NXTHDR(&msg, cmsg)) | |
399 | { | |
400 | if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { | |
401 | fd_size = cmsg->cmsg_len - CMSG_LEN(0); | |
402 | vmsg->fd_num = fd_size / sizeof(int); | |
403 | memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); | |
404 | break; | |
405 | } | |
406 | } | |
407 | ||
408 | if (vmsg->size > sizeof(vmsg->payload)) { | |
409 | fprintf(stderr, | |
410 | "Error: too big message request: %d, size: vmsg->size: %u, " | |
5602b39f | 411 | "while sizeof(vmsg->payload) = %zu\n", |
3595e2eb VK |
412 | vmsg->request, vmsg->size, sizeof(vmsg->payload)); |
413 | exit(1); | |
414 | } | |
415 | ||
416 | if (vmsg->size) { | |
417 | rc = read(conn_fd, &vmsg->payload, vmsg->size); | |
5c93c473 VK |
418 | if (rc == 0) { |
419 | vubr_die("recvmsg"); | |
420 | fprintf(stderr, "Peer disconnected.\n"); | |
421 | exit(1); | |
422 | } | |
423 | if (rc < 0) { | |
3595e2eb VK |
424 | vubr_die("recvmsg"); |
425 | } | |
426 | ||
427 | assert(rc == vmsg->size); | |
428 | } | |
429 | } | |
430 | ||
431 | static void | |
432 | vubr_message_write(int conn_fd, VhostUserMsg *vmsg) | |
433 | { | |
434 | int rc; | |
435 | ||
436 | do { | |
437 | rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); | |
438 | } while (rc < 0 && errno == EINTR); | |
439 | ||
440 | if (rc < 0) { | |
441 | vubr_die("write"); | |
442 | } | |
443 | } | |
444 | ||
445 | static void | |
446 | vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) | |
447 | { | |
448 | int slen = sizeof(struct sockaddr_in); | |
449 | ||
450 | if (sendto(dev->backend_udp_sock, buf, len, 0, | |
451 | (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { | |
452 | vubr_die("sendto()"); | |
453 | } | |
454 | } | |
455 | ||
456 | static int | |
457 | vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) | |
458 | { | |
459 | int slen = sizeof(struct sockaddr_in); | |
460 | int rc; | |
461 | ||
462 | rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, | |
463 | (struct sockaddr *) &dev->backend_udp_dest, | |
464 | (socklen_t *)&slen); | |
465 | if (rc == -1) { | |
466 | vubr_die("recvfrom()"); | |
467 | } | |
468 | ||
469 | return rc; | |
470 | } | |
471 | ||
472 | static void | |
473 | vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) | |
474 | { | |
a28c393c VK |
475 | int hdrlen = dev->hdrlen; |
476 | DPRINT(" hdrlen = %d\n", dev->hdrlen); | |
3595e2eb VK |
477 | |
478 | if (VHOST_USER_BRIDGE_DEBUG) { | |
479 | print_buffer(buf, len); | |
480 | } | |
481 | vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); | |
482 | } | |
483 | ||
5c93c473 VK |
484 | /* Kick the log_call_fd if required. */ |
485 | static void | |
486 | vubr_log_kick(VubrDev *dev) | |
487 | { | |
488 | if (dev->log_call_fd != -1) { | |
489 | DPRINT("Kicking the QEMU's log...\n"); | |
490 | eventfd_write(dev->log_call_fd, 1); | |
491 | } | |
492 | } | |
493 | ||
3595e2eb VK |
494 | /* Kick the guest if necessary. */ |
495 | static void | |
496 | vubr_virtqueue_kick(VubrVirtq *vq) | |
497 | { | |
498 | if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | |
499 | DPRINT("Kicking the guest...\n"); | |
500 | eventfd_write(vq->call_fd, 1); | |
501 | } | |
502 | } | |
503 | ||
5c93c473 VK |
504 | static void |
505 | vubr_log_page(uint8_t *log_table, uint64_t page) | |
506 | { | |
507 | DPRINT("Logged dirty guest page: %"PRId64"\n", page); | |
508 | atomic_or(&log_table[page / 8], 1 << (page % 8)); | |
509 | } | |
510 | ||
511 | static void | |
512 | vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) | |
513 | { | |
514 | uint64_t page; | |
515 | ||
516 | if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || | |
517 | !dev->log_table || !length) { | |
518 | return; | |
519 | } | |
520 | ||
521 | assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); | |
522 | ||
523 | page = address / VHOST_LOG_PAGE; | |
524 | while (page * VHOST_LOG_PAGE < address + length) { | |
525 | vubr_log_page(dev->log_table, page); | |
526 | page += VHOST_LOG_PAGE; | |
527 | } | |
528 | vubr_log_kick(dev); | |
529 | } | |
530 | ||
3595e2eb VK |
531 | static void |
532 | vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) | |
533 | { | |
5c93c473 | 534 | struct vring_desc *desc = vq->desc; |
3595e2eb | 535 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
536 | struct vring_used *used = vq->used; |
537 | uint64_t log_guest_addr = vq->log_guest_addr; | |
a28c393c | 538 | int32_t remaining_len = len; |
3595e2eb VK |
539 | |
540 | unsigned int size = vq->size; | |
541 | ||
542 | uint16_t avail_index = atomic_mb_read(&avail->idx); | |
543 | ||
544 | /* We check the available descriptors before posting the | |
545 | * buffer, so here we assume that enough available | |
546 | * descriptors. */ | |
547 | assert(vq->last_avail_index != avail_index); | |
548 | uint16_t a_index = vq->last_avail_index % size; | |
549 | uint16_t u_index = vq->last_used_index % size; | |
550 | uint16_t d_index = avail->ring[a_index]; | |
551 | ||
552 | int i = d_index; | |
a28c393c | 553 | uint32_t written_len = 0; |
3595e2eb | 554 | |
a28c393c VK |
555 | do { |
556 | DPRINT("Post packet to guest on vq:\n"); | |
557 | DPRINT(" size = %d\n", vq->size); | |
558 | DPRINT(" last_avail_index = %d\n", vq->last_avail_index); | |
559 | DPRINT(" last_used_index = %d\n", vq->last_used_index); | |
560 | DPRINT(" a_index = %d\n", a_index); | |
561 | DPRINT(" u_index = %d\n", u_index); | |
562 | DPRINT(" d_index = %d\n", d_index); | |
563 | DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); | |
564 | DPRINT(" desc[%d].len = %d\n", i, desc[i].len); | |
565 | DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); | |
566 | DPRINT(" avail->idx = %d\n", avail_index); | |
567 | DPRINT(" used->idx = %d\n", used->idx); | |
568 | ||
569 | if (!(desc[i].flags & VRING_DESC_F_WRITE)) { | |
570 | /* FIXME: we should find writable descriptor. */ | |
571 | fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); | |
572 | exit(1); | |
573 | } | |
3595e2eb | 574 | |
5602b39f | 575 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); |
a28c393c VK |
576 | uint32_t chunk_len = desc[i].len; |
577 | uint32_t chunk_write_len = MIN(remaining_len, chunk_len); | |
3595e2eb | 578 | |
a28c393c VK |
579 | memcpy(chunk_start, buf + written_len, chunk_write_len); |
580 | vubr_log_write(dev, desc[i].addr, chunk_write_len); | |
581 | remaining_len -= chunk_write_len; | |
582 | written_len += chunk_write_len; | |
583 | ||
584 | if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) { | |
585 | break; | |
586 | } | |
587 | ||
588 | i = desc[i].next; | |
589 | } while (1); | |
590 | ||
591 | if (remaining_len > 0) { | |
592 | fprintf(stderr, | |
593 | "Too long packet for RX, remaining_len = %d, Dropping...\n", | |
594 | remaining_len); | |
595 | return; | |
3595e2eb VK |
596 | } |
597 | ||
598 | /* Add descriptor to the used ring. */ | |
599 | used->ring[u_index].id = d_index; | |
600 | used->ring[u_index].len = len; | |
5c93c473 VK |
601 | vubr_log_write(dev, |
602 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
603 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
604 | |
605 | vq->last_avail_index++; | |
606 | vq->last_used_index++; | |
607 | ||
608 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
609 | vubr_log_write(dev, |
610 | log_guest_addr + offsetof(struct vring_used, idx), | |
611 | sizeof(used->idx)); | |
3595e2eb VK |
612 | |
613 | /* Kick the guest if necessary. */ | |
614 | vubr_virtqueue_kick(vq); | |
615 | } | |
616 | ||
617 | static int | |
618 | vubr_process_desc(VubrDev *dev, VubrVirtq *vq) | |
619 | { | |
5c93c473 | 620 | struct vring_desc *desc = vq->desc; |
3595e2eb | 621 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
622 | struct vring_used *used = vq->used; |
623 | uint64_t log_guest_addr = vq->log_guest_addr; | |
3595e2eb VK |
624 | |
625 | unsigned int size = vq->size; | |
626 | ||
627 | uint16_t a_index = vq->last_avail_index % size; | |
628 | uint16_t u_index = vq->last_used_index % size; | |
629 | uint16_t d_index = avail->ring[a_index]; | |
630 | ||
631 | uint32_t i, len = 0; | |
632 | size_t buf_size = 4096; | |
633 | uint8_t buf[4096]; | |
634 | ||
635 | DPRINT("Chunks: "); | |
636 | i = d_index; | |
637 | do { | |
5602b39f | 638 | void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr); |
3595e2eb VK |
639 | uint32_t chunk_len = desc[i].len; |
640 | ||
5c93c473 VK |
641 | assert(!(desc[i].flags & VRING_DESC_F_WRITE)); |
642 | ||
3595e2eb VK |
643 | if (len + chunk_len < buf_size) { |
644 | memcpy(buf + len, chunk_start, chunk_len); | |
645 | DPRINT("%d ", chunk_len); | |
646 | } else { | |
647 | fprintf(stderr, "Error: too long packet. Dropping...\n"); | |
648 | break; | |
649 | } | |
650 | ||
651 | len += chunk_len; | |
652 | ||
653 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) { | |
654 | break; | |
655 | } | |
656 | ||
657 | i = desc[i].next; | |
658 | } while (1); | |
659 | DPRINT("\n"); | |
660 | ||
661 | if (!len) { | |
662 | return -1; | |
663 | } | |
664 | ||
665 | /* Add descriptor to the used ring. */ | |
666 | used->ring[u_index].id = d_index; | |
667 | used->ring[u_index].len = len; | |
5c93c473 VK |
668 | vubr_log_write(dev, |
669 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
670 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
671 | |
672 | vubr_consume_raw_packet(dev, buf, len); | |
673 | ||
674 | return 0; | |
675 | } | |
676 | ||
677 | static void | |
678 | vubr_process_avail(VubrDev *dev, VubrVirtq *vq) | |
679 | { | |
680 | struct vring_avail *avail = vq->avail; | |
681 | struct vring_used *used = vq->used; | |
5c93c473 | 682 | uint64_t log_guest_addr = vq->log_guest_addr; |
3595e2eb VK |
683 | |
684 | while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { | |
685 | vubr_process_desc(dev, vq); | |
686 | vq->last_avail_index++; | |
687 | vq->last_used_index++; | |
688 | } | |
689 | ||
690 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
691 | vubr_log_write(dev, |
692 | log_guest_addr + offsetof(struct vring_used, idx), | |
693 | sizeof(used->idx)); | |
3595e2eb VK |
694 | } |
695 | ||
696 | static void | |
697 | vubr_backend_recv_cb(int sock, void *ctx) | |
698 | { | |
699 | VubrDev *dev = (VubrDev *) ctx; | |
700 | VubrVirtq *rx_vq = &dev->vq[0]; | |
701 | uint8_t buf[4096]; | |
702 | struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; | |
a28c393c | 703 | int hdrlen = dev->hdrlen; |
3595e2eb VK |
704 | int buflen = sizeof(buf); |
705 | int len; | |
706 | ||
5c93c473 VK |
707 | if (!dev->ready) { |
708 | return; | |
709 | } | |
710 | ||
3595e2eb | 711 | DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); |
a28c393c | 712 | DPRINT(" hdrlen = %d\n", hdrlen); |
3595e2eb VK |
713 | |
714 | uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); | |
715 | ||
716 | /* If there is no available descriptors, just do nothing. | |
717 | * The buffer will be handled by next arrived UDP packet, | |
718 | * or next kick on receive virtq. */ | |
719 | if (rx_vq->last_avail_index == avail_index) { | |
720 | DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); | |
721 | return; | |
722 | } | |
723 | ||
a28c393c VK |
724 | memset(buf, 0, hdrlen); |
725 | /* TODO: support mergeable buffers. */ | |
726 | if (hdrlen == 12) | |
727 | hdr->num_buffers = 1; | |
3595e2eb VK |
728 | len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); |
729 | ||
3595e2eb VK |
730 | vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); |
731 | } | |
732 | ||
733 | static void | |
734 | vubr_kick_cb(int sock, void *ctx) | |
735 | { | |
736 | VubrDev *dev = (VubrDev *) ctx; | |
737 | eventfd_t kick_data; | |
738 | ssize_t rc; | |
739 | ||
740 | rc = eventfd_read(sock, &kick_data); | |
741 | if (rc == -1) { | |
742 | vubr_die("eventfd_read()"); | |
743 | } else { | |
744 | DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); | |
745 | vubr_process_avail(dev, &dev->vq[1]); | |
746 | } | |
747 | } | |
748 | ||
749 | static int | |
750 | vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
751 | { | |
752 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
753 | return 0; | |
754 | } | |
755 | ||
756 | static int | |
757 | vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
758 | { | |
759 | vmsg->payload.u64 = | |
760 | ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | | |
5c93c473 | 761 | (1ULL << VHOST_F_LOG_ALL) | |
85ea9da5 | 762 | (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | |
5c93c473 VK |
763 | (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); |
764 | ||
3595e2eb VK |
765 | vmsg->size = sizeof(vmsg->payload.u64); |
766 | ||
767 | DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
768 | ||
5c93c473 | 769 | /* Reply */ |
3595e2eb VK |
770 | return 1; |
771 | } | |
772 | ||
773 | static int | |
774 | vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
775 | { | |
776 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
a28c393c | 777 | |
5c93c473 | 778 | dev->features = vmsg->payload.u64; |
a28c393c VK |
779 | if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) || |
780 | (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) { | |
781 | dev->hdrlen = 12; | |
782 | } else { | |
783 | dev->hdrlen = 10; | |
784 | } | |
785 | ||
3595e2eb VK |
786 | return 0; |
787 | } | |
788 | ||
789 | static int | |
790 | vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
791 | { | |
792 | return 0; | |
793 | } | |
794 | ||
5c93c473 VK |
795 | static void |
796 | vubr_close_log(VubrDev *dev) | |
797 | { | |
798 | if (dev->log_table) { | |
799 | if (munmap(dev->log_table, dev->log_size) != 0) { | |
800 | vubr_die("munmap()"); | |
801 | } | |
802 | ||
803 | dev->log_table = 0; | |
804 | } | |
805 | if (dev->log_call_fd != -1) { | |
806 | close(dev->log_call_fd); | |
807 | dev->log_call_fd = -1; | |
808 | } | |
809 | } | |
810 | ||
3595e2eb VK |
811 | static int |
812 | vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
813 | { | |
5c93c473 VK |
814 | vubr_close_log(dev); |
815 | dev->ready = 0; | |
816 | dev->features = 0; | |
3595e2eb VK |
817 | return 0; |
818 | } | |
819 | ||
820 | static int | |
821 | vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
822 | { | |
823 | int i; | |
824 | VhostUserMemory *memory = &vmsg->payload.memory; | |
825 | dev->nregions = memory->nregions; | |
826 | ||
827 | DPRINT("Nregions: %d\n", memory->nregions); | |
828 | for (i = 0; i < dev->nregions; i++) { | |
829 | void *mmap_addr; | |
830 | VhostUserMemoryRegion *msg_region = &memory->regions[i]; | |
831 | VubrDevRegion *dev_region = &dev->regions[i]; | |
832 | ||
833 | DPRINT("Region %d\n", i); | |
834 | DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", | |
835 | msg_region->guest_phys_addr); | |
836 | DPRINT(" memory_size: 0x%016"PRIx64"\n", | |
837 | msg_region->memory_size); | |
838 | DPRINT(" userspace_addr 0x%016"PRIx64"\n", | |
839 | msg_region->userspace_addr); | |
840 | DPRINT(" mmap_offset 0x%016"PRIx64"\n", | |
841 | msg_region->mmap_offset); | |
842 | ||
5c93c473 VK |
843 | dev_region->gpa = msg_region->guest_phys_addr; |
844 | dev_region->size = msg_region->memory_size; | |
845 | dev_region->qva = msg_region->userspace_addr; | |
3595e2eb VK |
846 | dev_region->mmap_offset = msg_region->mmap_offset; |
847 | ||
848 | /* We don't use offset argument of mmap() since the | |
849 | * mapped address has to be page aligned, and we use huge | |
850 | * pages. */ | |
851 | mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, | |
852 | PROT_READ | PROT_WRITE, MAP_SHARED, | |
853 | vmsg->fds[i], 0); | |
854 | ||
855 | if (mmap_addr == MAP_FAILED) { | |
856 | vubr_die("mmap"); | |
857 | } | |
5602b39f | 858 | dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; |
3595e2eb | 859 | DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); |
6d0b908a VK |
860 | |
861 | close(vmsg->fds[i]); | |
3595e2eb VK |
862 | } |
863 | ||
864 | return 0; | |
865 | } | |
866 | ||
867 | static int | |
868 | vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
869 | { | |
5c93c473 VK |
870 | int fd; |
871 | uint64_t log_mmap_size, log_mmap_offset; | |
872 | void *rc; | |
873 | ||
874 | assert(vmsg->fd_num == 1); | |
875 | fd = vmsg->fds[0]; | |
876 | ||
877 | assert(vmsg->size == sizeof(vmsg->payload.log)); | |
878 | log_mmap_offset = vmsg->payload.log.mmap_offset; | |
879 | log_mmap_size = vmsg->payload.log.mmap_size; | |
880 | DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); | |
881 | DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); | |
882 | ||
883 | rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, | |
884 | log_mmap_offset); | |
885 | if (rc == MAP_FAILED) { | |
886 | vubr_die("mmap"); | |
887 | } | |
888 | dev->log_table = rc; | |
889 | dev->log_size = log_mmap_size; | |
890 | ||
891 | vmsg->size = sizeof(vmsg->payload.u64); | |
892 | /* Reply */ | |
893 | return 1; | |
3595e2eb VK |
894 | } |
895 | ||
896 | static int | |
897 | vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
898 | { | |
5c93c473 VK |
899 | assert(vmsg->fd_num == 1); |
900 | dev->log_call_fd = vmsg->fds[0]; | |
901 | DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); | |
3595e2eb VK |
902 | return 0; |
903 | } | |
904 | ||
905 | static int | |
906 | vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
907 | { | |
908 | unsigned int index = vmsg->payload.state.index; | |
909 | unsigned int num = vmsg->payload.state.num; | |
910 | ||
911 | DPRINT("State.index: %d\n", index); | |
912 | DPRINT("State.num: %d\n", num); | |
913 | dev->vq[index].size = num; | |
914 | return 0; | |
915 | } | |
916 | ||
917 | static int | |
918 | vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
919 | { | |
920 | struct vhost_vring_addr *vra = &vmsg->payload.addr; | |
921 | unsigned int index = vra->index; | |
922 | VubrVirtq *vq = &dev->vq[index]; | |
923 | ||
924 | DPRINT("vhost_vring_addr:\n"); | |
925 | DPRINT(" index: %d\n", vra->index); | |
926 | DPRINT(" flags: %d\n", vra->flags); | |
927 | DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); | |
928 | DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); | |
929 | DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); | |
930 | DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); | |
931 | ||
5602b39f MT |
932 | vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr); |
933 | vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr); | |
934 | vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr); | |
5c93c473 | 935 | vq->log_guest_addr = vra->log_guest_addr; |
3595e2eb VK |
936 | |
937 | DPRINT("Setting virtq addresses:\n"); | |
938 | DPRINT(" vring_desc at %p\n", vq->desc); | |
939 | DPRINT(" vring_used at %p\n", vq->used); | |
940 | DPRINT(" vring_avail at %p\n", vq->avail); | |
941 | ||
942 | vq->last_used_index = vq->used->idx; | |
523b018d MAL |
943 | |
944 | if (vq->last_avail_index != vq->used->idx) { | |
945 | DPRINT("Last avail index != used index: %d != %d, resuming", | |
946 | vq->last_avail_index, vq->used->idx); | |
947 | vq->last_avail_index = vq->used->idx; | |
948 | } | |
949 | ||
3595e2eb VK |
950 | return 0; |
951 | } | |
952 | ||
953 | static int | |
954 | vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
955 | { | |
956 | unsigned int index = vmsg->payload.state.index; | |
957 | unsigned int num = vmsg->payload.state.num; | |
958 | ||
959 | DPRINT("State.index: %d\n", index); | |
960 | DPRINT("State.num: %d\n", num); | |
961 | dev->vq[index].last_avail_index = num; | |
962 | ||
963 | return 0; | |
964 | } | |
965 | ||
966 | static int | |
967 | vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
968 | { | |
5c93c473 VK |
969 | unsigned int index = vmsg->payload.state.index; |
970 | ||
971 | DPRINT("State.index: %d\n", index); | |
972 | vmsg->payload.state.num = dev->vq[index].last_avail_index; | |
973 | vmsg->size = sizeof(vmsg->payload.state); | |
974 | /* FIXME: this is a work-around for a bug in QEMU enabling | |
975 | * too early vrings. When protocol features are enabled, | |
976 | * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ | |
977 | dev->ready = 0; | |
978 | ||
6d0b908a VK |
979 | if (dev->vq[index].call_fd != -1) { |
980 | close(dev->vq[index].call_fd); | |
6d0b908a VK |
981 | dev->vq[index].call_fd = -1; |
982 | } | |
983 | if (dev->vq[index].kick_fd != -1) { | |
984 | close(dev->vq[index].kick_fd); | |
985 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
986 | dev->vq[index].kick_fd = -1; | |
987 | } | |
988 | ||
5c93c473 VK |
989 | /* Reply */ |
990 | return 1; | |
3595e2eb VK |
991 | } |
992 | ||
993 | static int | |
994 | vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
995 | { | |
996 | uint64_t u64_arg = vmsg->payload.u64; | |
997 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
998 | ||
999 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1000 | ||
1001 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1002 | assert(vmsg->fd_num == 1); | |
1003 | ||
6d0b908a VK |
1004 | if (dev->vq[index].kick_fd != -1) { |
1005 | close(dev->vq[index].kick_fd); | |
1006 | dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd); | |
1007 | } | |
3595e2eb VK |
1008 | dev->vq[index].kick_fd = vmsg->fds[0]; |
1009 | DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1010 | ||
1011 | if (index % 2 == 1) { | |
1012 | /* TX queue. */ | |
1013 | dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, | |
1014 | dev, vubr_kick_cb); | |
1015 | ||
1016 | DPRINT("Waiting for kicks on fd: %d for vq: %d\n", | |
1017 | dev->vq[index].kick_fd, index); | |
1018 | } | |
5c93c473 VK |
1019 | /* We temporarily use this hack to determine that both TX and RX |
1020 | * queues are set up and ready for processing. | |
1021 | * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and | |
1022 | * actual kicks. */ | |
1023 | if (dev->vq[0].kick_fd != -1 && | |
1024 | dev->vq[1].kick_fd != -1) { | |
1025 | dev->ready = 1; | |
1026 | DPRINT("vhost-user-bridge is ready for processing queues.\n"); | |
1027 | } | |
3595e2eb | 1028 | return 0; |
5c93c473 | 1029 | |
3595e2eb VK |
1030 | } |
1031 | ||
1032 | static int | |
1033 | vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1034 | { | |
1035 | uint64_t u64_arg = vmsg->payload.u64; | |
1036 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
1037 | ||
1038 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1039 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1040 | assert(vmsg->fd_num == 1); | |
1041 | ||
6d0b908a VK |
1042 | if (dev->vq[index].call_fd != -1) { |
1043 | close(dev->vq[index].call_fd); | |
6d0b908a | 1044 | } |
3595e2eb VK |
1045 | dev->vq[index].call_fd = vmsg->fds[0]; |
1046 | DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1047 | ||
1048 | return 0; | |
1049 | } | |
1050 | ||
1051 | static int | |
1052 | vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1053 | { | |
1054 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1055 | return 0; | |
1056 | } | |
1057 | ||
1058 | static int | |
1059 | vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1060 | { | |
5c93c473 | 1061 | vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; |
3595e2eb | 1062 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); |
5c93c473 VK |
1063 | vmsg->size = sizeof(vmsg->payload.u64); |
1064 | ||
1065 | /* Reply */ | |
1066 | return 1; | |
3595e2eb VK |
1067 | } |
1068 | ||
1069 | static int | |
1070 | vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1071 | { | |
1072 | /* FIXME: unimplented */ | |
1073 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1074 | return 0; | |
1075 | } | |
1076 | ||
1077 | static int | |
1078 | vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1079 | { | |
1080 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1081 | return 0; | |
1082 | } | |
1083 | ||
1084 | static int | |
1085 | vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1086 | { | |
5c93c473 VK |
1087 | unsigned int index = vmsg->payload.state.index; |
1088 | unsigned int enable = vmsg->payload.state.num; | |
1089 | ||
1090 | DPRINT("State.index: %d\n", index); | |
1091 | DPRINT("State.enable: %d\n", enable); | |
1092 | dev->vq[index].enable = enable; | |
3595e2eb VK |
1093 | return 0; |
1094 | } | |
1095 | ||
1096 | static int | |
1097 | vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1098 | { | |
1099 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1100 | return 0; | |
1101 | } | |
1102 | ||
1103 | static int | |
1104 | vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) | |
1105 | { | |
1106 | /* Print out generic part of the request. */ | |
1107 | DPRINT( | |
1108 | "================== Vhost user message from QEMU ==================\n"); | |
1109 | DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], | |
1110 | vmsg->request); | |
1111 | DPRINT("Flags: 0x%x\n", vmsg->flags); | |
1112 | DPRINT("Size: %d\n", vmsg->size); | |
1113 | ||
1114 | if (vmsg->fd_num) { | |
1115 | int i; | |
1116 | DPRINT("Fds:"); | |
1117 | for (i = 0; i < vmsg->fd_num; i++) { | |
1118 | DPRINT(" %d", vmsg->fds[i]); | |
1119 | } | |
1120 | DPRINT("\n"); | |
1121 | } | |
1122 | ||
1123 | switch (vmsg->request) { | |
1124 | case VHOST_USER_NONE: | |
1125 | return vubr_none_exec(dev, vmsg); | |
1126 | case VHOST_USER_GET_FEATURES: | |
1127 | return vubr_get_features_exec(dev, vmsg); | |
1128 | case VHOST_USER_SET_FEATURES: | |
1129 | return vubr_set_features_exec(dev, vmsg); | |
1130 | case VHOST_USER_SET_OWNER: | |
1131 | return vubr_set_owner_exec(dev, vmsg); | |
60915dc4 | 1132 | case VHOST_USER_RESET_OWNER: |
3595e2eb VK |
1133 | return vubr_reset_device_exec(dev, vmsg); |
1134 | case VHOST_USER_SET_MEM_TABLE: | |
1135 | return vubr_set_mem_table_exec(dev, vmsg); | |
1136 | case VHOST_USER_SET_LOG_BASE: | |
1137 | return vubr_set_log_base_exec(dev, vmsg); | |
1138 | case VHOST_USER_SET_LOG_FD: | |
1139 | return vubr_set_log_fd_exec(dev, vmsg); | |
1140 | case VHOST_USER_SET_VRING_NUM: | |
1141 | return vubr_set_vring_num_exec(dev, vmsg); | |
1142 | case VHOST_USER_SET_VRING_ADDR: | |
1143 | return vubr_set_vring_addr_exec(dev, vmsg); | |
1144 | case VHOST_USER_SET_VRING_BASE: | |
1145 | return vubr_set_vring_base_exec(dev, vmsg); | |
1146 | case VHOST_USER_GET_VRING_BASE: | |
1147 | return vubr_get_vring_base_exec(dev, vmsg); | |
1148 | case VHOST_USER_SET_VRING_KICK: | |
1149 | return vubr_set_vring_kick_exec(dev, vmsg); | |
1150 | case VHOST_USER_SET_VRING_CALL: | |
1151 | return vubr_set_vring_call_exec(dev, vmsg); | |
1152 | case VHOST_USER_SET_VRING_ERR: | |
1153 | return vubr_set_vring_err_exec(dev, vmsg); | |
1154 | case VHOST_USER_GET_PROTOCOL_FEATURES: | |
1155 | return vubr_get_protocol_features_exec(dev, vmsg); | |
1156 | case VHOST_USER_SET_PROTOCOL_FEATURES: | |
1157 | return vubr_set_protocol_features_exec(dev, vmsg); | |
1158 | case VHOST_USER_GET_QUEUE_NUM: | |
1159 | return vubr_get_queue_num_exec(dev, vmsg); | |
1160 | case VHOST_USER_SET_VRING_ENABLE: | |
1161 | return vubr_set_vring_enable_exec(dev, vmsg); | |
1162 | case VHOST_USER_SEND_RARP: | |
1163 | return vubr_send_rarp_exec(dev, vmsg); | |
1164 | ||
1165 | case VHOST_USER_MAX: | |
1166 | assert(vmsg->request != VHOST_USER_MAX); | |
1167 | } | |
1168 | return 0; | |
1169 | } | |
1170 | ||
1171 | static void | |
1172 | vubr_receive_cb(int sock, void *ctx) | |
1173 | { | |
1174 | VubrDev *dev = (VubrDev *) ctx; | |
1175 | VhostUserMsg vmsg; | |
1176 | int reply_requested; | |
1177 | ||
1178 | vubr_message_read(sock, &vmsg); | |
1179 | reply_requested = vubr_execute_request(dev, &vmsg); | |
1180 | if (reply_requested) { | |
1181 | /* Set the version in the flags when sending the reply */ | |
1182 | vmsg.flags &= ~VHOST_USER_VERSION_MASK; | |
1183 | vmsg.flags |= VHOST_USER_VERSION; | |
1184 | vmsg.flags |= VHOST_USER_REPLY_MASK; | |
1185 | vubr_message_write(sock, &vmsg); | |
1186 | } | |
1187 | } | |
1188 | ||
1189 | static void | |
1190 | vubr_accept_cb(int sock, void *ctx) | |
1191 | { | |
1192 | VubrDev *dev = (VubrDev *)ctx; | |
1193 | int conn_fd; | |
1194 | struct sockaddr_un un; | |
1195 | socklen_t len = sizeof(un); | |
1196 | ||
1197 | conn_fd = accept(sock, (struct sockaddr *) &un, &len); | |
5c93c473 | 1198 | if (conn_fd == -1) { |
3595e2eb VK |
1199 | vubr_die("accept()"); |
1200 | } | |
1201 | DPRINT("Got connection from remote peer on sock %d\n", conn_fd); | |
1202 | dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); | |
98206d4e | 1203 | dispatcher_remove(&dev->dispatcher, sock); |
3595e2eb VK |
1204 | } |
1205 | ||
1206 | static VubrDev * | |
aef8486e | 1207 | vubr_new(const char *path, bool client) |
3595e2eb VK |
1208 | { |
1209 | VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); | |
1210 | dev->nregions = 0; | |
1211 | int i; | |
1212 | struct sockaddr_un un; | |
aef8486e | 1213 | CallbackFunc cb; |
3595e2eb VK |
1214 | size_t len; |
1215 | ||
1216 | for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { | |
1217 | dev->vq[i] = (VubrVirtq) { | |
1218 | .call_fd = -1, .kick_fd = -1, | |
1219 | .size = 0, | |
1220 | .last_avail_index = 0, .last_used_index = 0, | |
1221 | .desc = 0, .avail = 0, .used = 0, | |
5c93c473 | 1222 | .enable = 0, |
3595e2eb VK |
1223 | }; |
1224 | } | |
1225 | ||
5c93c473 VK |
1226 | /* Init log */ |
1227 | dev->log_call_fd = -1; | |
1228 | dev->log_size = 0; | |
1229 | dev->log_table = 0; | |
1230 | dev->ready = 0; | |
1231 | dev->features = 0; | |
1232 | ||
3595e2eb VK |
1233 | /* Get a UNIX socket. */ |
1234 | dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1235 | if (dev->sock == -1) { | |
1236 | vubr_die("socket"); | |
1237 | } | |
1238 | ||
1239 | un.sun_family = AF_UNIX; | |
1240 | strcpy(un.sun_path, path); | |
1241 | len = sizeof(un.sun_family) + strlen(path); | |
3595e2eb | 1242 | |
aef8486e MAL |
1243 | if (!client) { |
1244 | unlink(path); | |
3595e2eb | 1245 | |
aef8486e MAL |
1246 | if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { |
1247 | vubr_die("bind"); | |
1248 | } | |
1249 | ||
1250 | if (listen(dev->sock, 1) == -1) { | |
1251 | vubr_die("listen"); | |
1252 | } | |
1253 | cb = vubr_accept_cb; | |
3595e2eb | 1254 | |
aef8486e MAL |
1255 | DPRINT("Waiting for connections on UNIX socket %s ...\n", path); |
1256 | } else { | |
1257 | if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) { | |
1258 | vubr_die("connect"); | |
1259 | } | |
1260 | cb = vubr_receive_cb; | |
3595e2eb VK |
1261 | } |
1262 | ||
1263 | dispatcher_init(&dev->dispatcher); | |
aef8486e | 1264 | dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb); |
3595e2eb | 1265 | |
3595e2eb VK |
1266 | return dev; |
1267 | } | |
1268 | ||
7cf32491 VK |
1269 | static void |
1270 | vubr_set_host(struct sockaddr_in *saddr, const char *host) | |
1271 | { | |
1272 | if (isdigit(host[0])) { | |
1273 | if (!inet_aton(host, &saddr->sin_addr)) { | |
1274 | fprintf(stderr, "inet_aton() failed.\n"); | |
1275 | exit(1); | |
1276 | } | |
1277 | } else { | |
1278 | struct hostent *he = gethostbyname(host); | |
1279 | ||
1280 | if (!he) { | |
1281 | fprintf(stderr, "gethostbyname() failed.\n"); | |
1282 | exit(1); | |
1283 | } | |
1284 | saddr->sin_addr = *(struct in_addr *)he->h_addr; | |
1285 | } | |
1286 | } | |
1287 | ||
3595e2eb VK |
1288 | static void |
1289 | vubr_backend_udp_setup(VubrDev *dev, | |
1290 | const char *local_host, | |
7cf32491 VK |
1291 | const char *local_port, |
1292 | const char *remote_host, | |
1293 | const char *remote_port) | |
3595e2eb VK |
1294 | { |
1295 | int sock; | |
7cf32491 VK |
1296 | const char *r; |
1297 | ||
1298 | int lport, rport; | |
1299 | ||
1300 | lport = strtol(local_port, (char **)&r, 0); | |
1301 | if (r == local_port) { | |
1302 | fprintf(stderr, "lport parsing failed.\n"); | |
1303 | exit(1); | |
1304 | } | |
1305 | ||
1306 | rport = strtol(remote_port, (char **)&r, 0); | |
1307 | if (r == remote_port) { | |
1308 | fprintf(stderr, "rport parsing failed.\n"); | |
1309 | exit(1); | |
1310 | } | |
1311 | ||
3595e2eb VK |
1312 | struct sockaddr_in si_local = { |
1313 | .sin_family = AF_INET, | |
7cf32491 | 1314 | .sin_port = htons(lport), |
3595e2eb VK |
1315 | }; |
1316 | ||
7cf32491 | 1317 | vubr_set_host(&si_local, local_host); |
3595e2eb VK |
1318 | |
1319 | /* setup destination for sends */ | |
1320 | dev->backend_udp_dest = (struct sockaddr_in) { | |
1321 | .sin_family = AF_INET, | |
7cf32491 | 1322 | .sin_port = htons(rport), |
3595e2eb | 1323 | }; |
7cf32491 | 1324 | vubr_set_host(&dev->backend_udp_dest, remote_host); |
3595e2eb VK |
1325 | |
1326 | sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); | |
1327 | if (sock == -1) { | |
1328 | vubr_die("socket"); | |
1329 | } | |
1330 | ||
1331 | if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { | |
1332 | vubr_die("bind"); | |
1333 | } | |
1334 | ||
1335 | dev->backend_udp_sock = sock; | |
1336 | dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); | |
1337 | DPRINT("Waiting for data from udp backend on %s:%d...\n", | |
7cf32491 | 1338 | local_host, lport); |
3595e2eb VK |
1339 | } |
1340 | ||
1341 | static void | |
1342 | vubr_run(VubrDev *dev) | |
1343 | { | |
1344 | while (1) { | |
1345 | /* timeout 200ms */ | |
1346 | dispatcher_wait(&dev->dispatcher, 200000); | |
1347 | /* Here one can try polling strategy. */ | |
1348 | } | |
1349 | } | |
1350 | ||
7cf32491 VK |
1351 | static int |
1352 | vubr_parse_host_port(const char **host, const char **port, const char *buf) | |
1353 | { | |
1354 | char *p = strchr(buf, ':'); | |
1355 | ||
1356 | if (!p) { | |
1357 | return -1; | |
1358 | } | |
1359 | *p = '\0'; | |
1360 | *host = strdup(buf); | |
1361 | *port = strdup(p + 1); | |
1362 | return 0; | |
1363 | } | |
1364 | ||
1365 | #define DEFAULT_UD_SOCKET "/tmp/vubr.sock" | |
1366 | #define DEFAULT_LHOST "127.0.0.1" | |
1367 | #define DEFAULT_LPORT "4444" | |
1368 | #define DEFAULT_RHOST "127.0.0.1" | |
1369 | #define DEFAULT_RPORT "5555" | |
1370 | ||
1371 | static const char *ud_socket_path = DEFAULT_UD_SOCKET; | |
1372 | static const char *lhost = DEFAULT_LHOST; | |
1373 | static const char *lport = DEFAULT_LPORT; | |
1374 | static const char *rhost = DEFAULT_RHOST; | |
1375 | static const char *rport = DEFAULT_RPORT; | |
1376 | ||
3595e2eb VK |
1377 | int |
1378 | main(int argc, char *argv[]) | |
1379 | { | |
1380 | VubrDev *dev; | |
7cf32491 | 1381 | int opt; |
aef8486e | 1382 | bool client = false; |
3595e2eb | 1383 | |
aef8486e | 1384 | while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) { |
7cf32491 VK |
1385 | |
1386 | switch (opt) { | |
1387 | case 'l': | |
1388 | if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { | |
1389 | goto out; | |
1390 | } | |
1391 | break; | |
1392 | case 'r': | |
1393 | if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { | |
1394 | goto out; | |
1395 | } | |
1396 | break; | |
1397 | case 'u': | |
1398 | ud_socket_path = strdup(optarg); | |
1399 | break; | |
aef8486e MAL |
1400 | case 'c': |
1401 | client = true; | |
1402 | break; | |
7cf32491 VK |
1403 | default: |
1404 | goto out; | |
1405 | } | |
1406 | } | |
1407 | ||
aef8486e MAL |
1408 | DPRINT("ud socket: %s (%s)\n", ud_socket_path, |
1409 | client ? "client" : "server"); | |
7cf32491 VK |
1410 | DPRINT("local: %s:%s\n", lhost, lport); |
1411 | DPRINT("remote: %s:%s\n", rhost, rport); | |
1412 | ||
aef8486e | 1413 | dev = vubr_new(ud_socket_path, client); |
3595e2eb VK |
1414 | if (!dev) { |
1415 | return 1; | |
1416 | } | |
1417 | ||
7cf32491 | 1418 | vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); |
3595e2eb VK |
1419 | vubr_run(dev); |
1420 | return 0; | |
7cf32491 VK |
1421 | |
1422 | out: | |
1423 | fprintf(stderr, "Usage: %s ", argv[0]); | |
aef8486e | 1424 | fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); |
7cf32491 VK |
1425 | fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", |
1426 | DEFAULT_UD_SOCKET); | |
1427 | fprintf(stderr, "\t-l local host and port. default: %s:%s\n", | |
1428 | DEFAULT_LHOST, DEFAULT_LPORT); | |
1429 | fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", | |
1430 | DEFAULT_RHOST, DEFAULT_RPORT); | |
aef8486e | 1431 | fprintf(stderr, "\t-c client mode\n"); |
7cf32491 VK |
1432 | |
1433 | return 1; | |
3595e2eb | 1434 | } |