]>
Commit | Line | Data |
---|---|---|
3595e2eb VK |
1 | /* |
2 | * Vhost User Bridge | |
3 | * | |
4 | * Copyright (c) 2015 Red Hat, Inc. | |
5 | * | |
6 | * Authors: | |
7 | * Victor Kaplansky <victork@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | /* | |
14 | * TODO: | |
15 | * - main should get parameters from the command line. | |
5c93c473 VK |
16 | * - implement all request handlers. Still not implemented: |
17 | * vubr_get_queue_num_exec() | |
18 | * vubr_send_rarp_exec() | |
3595e2eb VK |
19 | * - test for broken requests and virtqueue. |
20 | * - implement features defined by Virtio 1.0 spec. | |
21 | * - support mergeable buffers and indirect descriptors. | |
3595e2eb VK |
22 | * - implement clean shutdown. |
23 | * - implement non-blocking writes to UDP backend. | |
24 | * - implement polling strategy. | |
5c93c473 VK |
25 | * - implement clean starting/stopping of vq processing |
26 | * - implement clean starting/stopping of used and buffers | |
27 | * dirty page logging. | |
3595e2eb VK |
28 | */ |
29 | ||
5c93c473 VK |
30 | #define _FILE_OFFSET_BITS 64 |
31 | ||
3595e2eb VK |
32 | #include <stddef.h> |
33 | #include <assert.h> | |
34 | #include <stdio.h> | |
35 | #include <stdlib.h> | |
36 | #include <stdint.h> | |
37 | #include <inttypes.h> | |
38 | #include <string.h> | |
39 | #include <unistd.h> | |
40 | #include <errno.h> | |
41 | #include <sys/types.h> | |
42 | #include <sys/socket.h> | |
43 | #include <sys/un.h> | |
44 | #include <sys/unistd.h> | |
45 | #include <sys/mman.h> | |
46 | #include <sys/eventfd.h> | |
47 | #include <arpa/inet.h> | |
7cf32491 VK |
48 | #include <ctype.h> |
49 | #include <netdb.h> | |
3595e2eb VK |
50 | |
51 | #include <linux/vhost.h> | |
52 | ||
53 | #include "qemu/atomic.h" | |
54 | #include "standard-headers/linux/virtio_net.h" | |
55 | #include "standard-headers/linux/virtio_ring.h" | |
56 | ||
57 | #define VHOST_USER_BRIDGE_DEBUG 1 | |
58 | ||
59 | #define DPRINT(...) \ | |
60 | do { \ | |
61 | if (VHOST_USER_BRIDGE_DEBUG) { \ | |
62 | printf(__VA_ARGS__); \ | |
63 | } \ | |
64 | } while (0) | |
65 | ||
66 | typedef void (*CallbackFunc)(int sock, void *ctx); | |
67 | ||
68 | typedef struct Event { | |
69 | void *ctx; | |
70 | CallbackFunc callback; | |
71 | } Event; | |
72 | ||
73 | typedef struct Dispatcher { | |
74 | int max_sock; | |
75 | fd_set fdset; | |
76 | Event events[FD_SETSIZE]; | |
77 | } Dispatcher; | |
78 | ||
79 | static void | |
80 | vubr_die(const char *s) | |
81 | { | |
82 | perror(s); | |
83 | exit(1); | |
84 | } | |
85 | ||
86 | static int | |
87 | dispatcher_init(Dispatcher *dispr) | |
88 | { | |
89 | FD_ZERO(&dispr->fdset); | |
90 | dispr->max_sock = -1; | |
91 | return 0; | |
92 | } | |
93 | ||
94 | static int | |
95 | dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) | |
96 | { | |
97 | if (sock >= FD_SETSIZE) { | |
98 | fprintf(stderr, | |
99 | "Error: Failed to add new event. sock %d should be less than %d\n", | |
100 | sock, FD_SETSIZE); | |
101 | return -1; | |
102 | } | |
103 | ||
104 | dispr->events[sock].ctx = ctx; | |
105 | dispr->events[sock].callback = cb; | |
106 | ||
107 | FD_SET(sock, &dispr->fdset); | |
108 | if (sock > dispr->max_sock) { | |
109 | dispr->max_sock = sock; | |
110 | } | |
111 | DPRINT("Added sock %d for watching. max_sock: %d\n", | |
112 | sock, dispr->max_sock); | |
113 | return 0; | |
114 | } | |
115 | ||
116 | #if 0 | |
117 | /* dispatcher_remove() is not currently in use but may be useful | |
118 | * in the future. */ | |
119 | static int | |
120 | dispatcher_remove(Dispatcher *dispr, int sock) | |
121 | { | |
122 | if (sock >= FD_SETSIZE) { | |
123 | fprintf(stderr, | |
124 | "Error: Failed to remove event. sock %d should be less than %d\n", | |
125 | sock, FD_SETSIZE); | |
126 | return -1; | |
127 | } | |
128 | ||
129 | FD_CLR(sock, &dispr->fdset); | |
130 | return 0; | |
131 | } | |
132 | #endif | |
133 | ||
134 | /* timeout in us */ | |
135 | static int | |
136 | dispatcher_wait(Dispatcher *dispr, uint32_t timeout) | |
137 | { | |
138 | struct timeval tv; | |
139 | tv.tv_sec = timeout / 1000000; | |
140 | tv.tv_usec = timeout % 1000000; | |
141 | ||
142 | fd_set fdset = dispr->fdset; | |
143 | ||
144 | /* wait until some of sockets become readable. */ | |
145 | int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); | |
146 | ||
147 | if (rc == -1) { | |
148 | vubr_die("select"); | |
149 | } | |
150 | ||
151 | /* Timeout */ | |
152 | if (rc == 0) { | |
153 | return 0; | |
154 | } | |
155 | ||
156 | /* Now call callback for every ready socket. */ | |
157 | ||
158 | int sock; | |
159 | for (sock = 0; sock < dispr->max_sock + 1; sock++) | |
160 | if (FD_ISSET(sock, &fdset)) { | |
161 | Event *e = &dispr->events[sock]; | |
162 | e->callback(sock, e->ctx); | |
163 | } | |
164 | ||
165 | return 0; | |
166 | } | |
167 | ||
168 | typedef struct VubrVirtq { | |
169 | int call_fd; | |
170 | int kick_fd; | |
171 | uint32_t size; | |
172 | uint16_t last_avail_index; | |
173 | uint16_t last_used_index; | |
174 | struct vring_desc *desc; | |
175 | struct vring_avail *avail; | |
176 | struct vring_used *used; | |
5c93c473 VK |
177 | uint64_t log_guest_addr; |
178 | int enable; | |
3595e2eb VK |
179 | } VubrVirtq; |
180 | ||
181 | /* Based on qemu/hw/virtio/vhost-user.c */ | |
182 | ||
183 | #define VHOST_MEMORY_MAX_NREGIONS 8 | |
184 | #define VHOST_USER_F_PROTOCOL_FEATURES 30 | |
185 | ||
5c93c473 VK |
186 | #define VHOST_LOG_PAGE 4096 |
187 | ||
3595e2eb VK |
188 | enum VhostUserProtocolFeature { |
189 | VHOST_USER_PROTOCOL_F_MQ = 0, | |
190 | VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, | |
191 | VHOST_USER_PROTOCOL_F_RARP = 2, | |
192 | ||
193 | VHOST_USER_PROTOCOL_F_MAX | |
194 | }; | |
195 | ||
196 | #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) | |
197 | ||
198 | typedef enum VhostUserRequest { | |
199 | VHOST_USER_NONE = 0, | |
200 | VHOST_USER_GET_FEATURES = 1, | |
201 | VHOST_USER_SET_FEATURES = 2, | |
202 | VHOST_USER_SET_OWNER = 3, | |
60915dc4 | 203 | VHOST_USER_RESET_OWNER = 4, |
3595e2eb VK |
204 | VHOST_USER_SET_MEM_TABLE = 5, |
205 | VHOST_USER_SET_LOG_BASE = 6, | |
206 | VHOST_USER_SET_LOG_FD = 7, | |
207 | VHOST_USER_SET_VRING_NUM = 8, | |
208 | VHOST_USER_SET_VRING_ADDR = 9, | |
209 | VHOST_USER_SET_VRING_BASE = 10, | |
210 | VHOST_USER_GET_VRING_BASE = 11, | |
211 | VHOST_USER_SET_VRING_KICK = 12, | |
212 | VHOST_USER_SET_VRING_CALL = 13, | |
213 | VHOST_USER_SET_VRING_ERR = 14, | |
214 | VHOST_USER_GET_PROTOCOL_FEATURES = 15, | |
215 | VHOST_USER_SET_PROTOCOL_FEATURES = 16, | |
216 | VHOST_USER_GET_QUEUE_NUM = 17, | |
217 | VHOST_USER_SET_VRING_ENABLE = 18, | |
218 | VHOST_USER_SEND_RARP = 19, | |
219 | VHOST_USER_MAX | |
220 | } VhostUserRequest; | |
221 | ||
222 | typedef struct VhostUserMemoryRegion { | |
223 | uint64_t guest_phys_addr; | |
224 | uint64_t memory_size; | |
225 | uint64_t userspace_addr; | |
226 | uint64_t mmap_offset; | |
227 | } VhostUserMemoryRegion; | |
228 | ||
229 | typedef struct VhostUserMemory { | |
230 | uint32_t nregions; | |
231 | uint32_t padding; | |
232 | VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
233 | } VhostUserMemory; | |
234 | ||
5c93c473 VK |
235 | typedef struct VhostUserLog { |
236 | uint64_t mmap_size; | |
237 | uint64_t mmap_offset; | |
238 | } VhostUserLog; | |
239 | ||
3595e2eb VK |
240 | typedef struct VhostUserMsg { |
241 | VhostUserRequest request; | |
242 | ||
243 | #define VHOST_USER_VERSION_MASK (0x3) | |
244 | #define VHOST_USER_REPLY_MASK (0x1<<2) | |
245 | uint32_t flags; | |
246 | uint32_t size; /* the following payload size */ | |
247 | union { | |
248 | #define VHOST_USER_VRING_IDX_MASK (0xff) | |
249 | #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) | |
250 | uint64_t u64; | |
251 | struct vhost_vring_state state; | |
252 | struct vhost_vring_addr addr; | |
253 | VhostUserMemory memory; | |
5c93c473 | 254 | VhostUserLog log; |
3595e2eb VK |
255 | } payload; |
256 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
257 | int fd_num; | |
258 | } QEMU_PACKED VhostUserMsg; | |
259 | ||
260 | #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) | |
261 | ||
262 | /* The version of the protocol we support */ | |
263 | #define VHOST_USER_VERSION (0x1) | |
264 | ||
265 | #define MAX_NR_VIRTQUEUE (8) | |
266 | ||
267 | typedef struct VubrDevRegion { | |
268 | /* Guest Physical address. */ | |
269 | uint64_t gpa; | |
270 | /* Memory region size. */ | |
271 | uint64_t size; | |
272 | /* QEMU virtual address (userspace). */ | |
273 | uint64_t qva; | |
274 | /* Starting offset in our mmaped space. */ | |
275 | uint64_t mmap_offset; | |
276 | /* Start address of mmaped space. */ | |
277 | uint64_t mmap_addr; | |
278 | } VubrDevRegion; | |
279 | ||
280 | typedef struct VubrDev { | |
281 | int sock; | |
282 | Dispatcher dispatcher; | |
283 | uint32_t nregions; | |
284 | VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; | |
285 | VubrVirtq vq[MAX_NR_VIRTQUEUE]; | |
5c93c473 VK |
286 | int log_call_fd; |
287 | uint64_t log_size; | |
288 | uint8_t *log_table; | |
3595e2eb VK |
289 | int backend_udp_sock; |
290 | struct sockaddr_in backend_udp_dest; | |
5c93c473 VK |
291 | int ready; |
292 | uint64_t features; | |
3595e2eb VK |
293 | } VubrDev; |
294 | ||
295 | static const char *vubr_request_str[] = { | |
296 | [VHOST_USER_NONE] = "VHOST_USER_NONE", | |
297 | [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", | |
298 | [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", | |
299 | [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", | |
60915dc4 | 300 | [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", |
3595e2eb VK |
301 | [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", |
302 | [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", | |
303 | [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", | |
304 | [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", | |
305 | [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", | |
306 | [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", | |
307 | [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", | |
308 | [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", | |
309 | [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", | |
310 | [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", | |
311 | [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", | |
312 | [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", | |
313 | [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", | |
314 | [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", | |
315 | [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", | |
316 | [VHOST_USER_MAX] = "VHOST_USER_MAX", | |
317 | }; | |
318 | ||
319 | static void | |
320 | print_buffer(uint8_t *buf, size_t len) | |
321 | { | |
322 | int i; | |
323 | printf("Raw buffer:\n"); | |
324 | for (i = 0; i < len; i++) { | |
325 | if (i % 16 == 0) { | |
326 | printf("\n"); | |
327 | } | |
328 | if (i % 4 == 0) { | |
329 | printf(" "); | |
330 | } | |
331 | printf("%02x ", buf[i]); | |
332 | } | |
333 | printf("\n............................................................\n"); | |
334 | } | |
335 | ||
336 | /* Translate guest physical address to our virtual address. */ | |
337 | static uint64_t | |
338 | gpa_to_va(VubrDev *dev, uint64_t guest_addr) | |
339 | { | |
340 | int i; | |
341 | ||
342 | /* Find matching memory region. */ | |
343 | for (i = 0; i < dev->nregions; i++) { | |
344 | VubrDevRegion *r = &dev->regions[i]; | |
345 | ||
346 | if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { | |
347 | return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; | |
348 | } | |
349 | } | |
350 | ||
351 | assert(!"address not found in regions"); | |
352 | return 0; | |
353 | } | |
354 | ||
355 | /* Translate qemu virtual address to our virtual address. */ | |
356 | static uint64_t | |
357 | qva_to_va(VubrDev *dev, uint64_t qemu_addr) | |
358 | { | |
359 | int i; | |
360 | ||
361 | /* Find matching memory region. */ | |
362 | for (i = 0; i < dev->nregions; i++) { | |
363 | VubrDevRegion *r = &dev->regions[i]; | |
364 | ||
365 | if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { | |
366 | return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; | |
367 | } | |
368 | } | |
369 | ||
370 | assert(!"address not found in regions"); | |
371 | return 0; | |
372 | } | |
373 | ||
374 | static void | |
375 | vubr_message_read(int conn_fd, VhostUserMsg *vmsg) | |
376 | { | |
377 | char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; | |
378 | struct iovec iov = { | |
379 | .iov_base = (char *)vmsg, | |
380 | .iov_len = VHOST_USER_HDR_SIZE, | |
381 | }; | |
382 | struct msghdr msg = { | |
383 | .msg_iov = &iov, | |
384 | .msg_iovlen = 1, | |
385 | .msg_control = control, | |
386 | .msg_controllen = sizeof(control), | |
387 | }; | |
388 | size_t fd_size; | |
389 | struct cmsghdr *cmsg; | |
390 | int rc; | |
391 | ||
392 | rc = recvmsg(conn_fd, &msg, 0); | |
393 | ||
5c93c473 VK |
394 | if (rc == 0) { |
395 | vubr_die("recvmsg"); | |
396 | fprintf(stderr, "Peer disconnected.\n"); | |
397 | exit(1); | |
398 | } | |
399 | if (rc < 0) { | |
3595e2eb VK |
400 | vubr_die("recvmsg"); |
401 | } | |
402 | ||
403 | vmsg->fd_num = 0; | |
404 | for (cmsg = CMSG_FIRSTHDR(&msg); | |
405 | cmsg != NULL; | |
406 | cmsg = CMSG_NXTHDR(&msg, cmsg)) | |
407 | { | |
408 | if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { | |
409 | fd_size = cmsg->cmsg_len - CMSG_LEN(0); | |
410 | vmsg->fd_num = fd_size / sizeof(int); | |
411 | memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); | |
412 | break; | |
413 | } | |
414 | } | |
415 | ||
416 | if (vmsg->size > sizeof(vmsg->payload)) { | |
417 | fprintf(stderr, | |
418 | "Error: too big message request: %d, size: vmsg->size: %u, " | |
419 | "while sizeof(vmsg->payload) = %lu\n", | |
420 | vmsg->request, vmsg->size, sizeof(vmsg->payload)); | |
421 | exit(1); | |
422 | } | |
423 | ||
424 | if (vmsg->size) { | |
425 | rc = read(conn_fd, &vmsg->payload, vmsg->size); | |
5c93c473 VK |
426 | if (rc == 0) { |
427 | vubr_die("recvmsg"); | |
428 | fprintf(stderr, "Peer disconnected.\n"); | |
429 | exit(1); | |
430 | } | |
431 | if (rc < 0) { | |
3595e2eb VK |
432 | vubr_die("recvmsg"); |
433 | } | |
434 | ||
435 | assert(rc == vmsg->size); | |
436 | } | |
437 | } | |
438 | ||
439 | static void | |
440 | vubr_message_write(int conn_fd, VhostUserMsg *vmsg) | |
441 | { | |
442 | int rc; | |
443 | ||
444 | do { | |
445 | rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); | |
446 | } while (rc < 0 && errno == EINTR); | |
447 | ||
448 | if (rc < 0) { | |
449 | vubr_die("write"); | |
450 | } | |
451 | } | |
452 | ||
453 | static void | |
454 | vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) | |
455 | { | |
456 | int slen = sizeof(struct sockaddr_in); | |
457 | ||
458 | if (sendto(dev->backend_udp_sock, buf, len, 0, | |
459 | (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { | |
460 | vubr_die("sendto()"); | |
461 | } | |
462 | } | |
463 | ||
464 | static int | |
465 | vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) | |
466 | { | |
467 | int slen = sizeof(struct sockaddr_in); | |
468 | int rc; | |
469 | ||
470 | rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, | |
471 | (struct sockaddr *) &dev->backend_udp_dest, | |
472 | (socklen_t *)&slen); | |
473 | if (rc == -1) { | |
474 | vubr_die("recvfrom()"); | |
475 | } | |
476 | ||
477 | return rc; | |
478 | } | |
479 | ||
480 | static void | |
481 | vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) | |
482 | { | |
483 | int hdrlen = sizeof(struct virtio_net_hdr_v1); | |
484 | ||
485 | if (VHOST_USER_BRIDGE_DEBUG) { | |
486 | print_buffer(buf, len); | |
487 | } | |
488 | vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); | |
489 | } | |
490 | ||
5c93c473 VK |
491 | /* Kick the log_call_fd if required. */ |
492 | static void | |
493 | vubr_log_kick(VubrDev *dev) | |
494 | { | |
495 | if (dev->log_call_fd != -1) { | |
496 | DPRINT("Kicking the QEMU's log...\n"); | |
497 | eventfd_write(dev->log_call_fd, 1); | |
498 | } | |
499 | } | |
500 | ||
3595e2eb VK |
501 | /* Kick the guest if necessary. */ |
502 | static void | |
503 | vubr_virtqueue_kick(VubrVirtq *vq) | |
504 | { | |
505 | if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | |
506 | DPRINT("Kicking the guest...\n"); | |
507 | eventfd_write(vq->call_fd, 1); | |
508 | } | |
509 | } | |
510 | ||
5c93c473 VK |
511 | static void |
512 | vubr_log_page(uint8_t *log_table, uint64_t page) | |
513 | { | |
514 | DPRINT("Logged dirty guest page: %"PRId64"\n", page); | |
515 | atomic_or(&log_table[page / 8], 1 << (page % 8)); | |
516 | } | |
517 | ||
518 | static void | |
519 | vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) | |
520 | { | |
521 | uint64_t page; | |
522 | ||
523 | if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || | |
524 | !dev->log_table || !length) { | |
525 | return; | |
526 | } | |
527 | ||
528 | assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); | |
529 | ||
530 | page = address / VHOST_LOG_PAGE; | |
531 | while (page * VHOST_LOG_PAGE < address + length) { | |
532 | vubr_log_page(dev->log_table, page); | |
533 | page += VHOST_LOG_PAGE; | |
534 | } | |
535 | vubr_log_kick(dev); | |
536 | } | |
537 | ||
3595e2eb VK |
538 | static void |
539 | vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) | |
540 | { | |
5c93c473 | 541 | struct vring_desc *desc = vq->desc; |
3595e2eb | 542 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
543 | struct vring_used *used = vq->used; |
544 | uint64_t log_guest_addr = vq->log_guest_addr; | |
3595e2eb VK |
545 | |
546 | unsigned int size = vq->size; | |
547 | ||
548 | uint16_t avail_index = atomic_mb_read(&avail->idx); | |
549 | ||
550 | /* We check the available descriptors before posting the | |
551 | * buffer, so here we assume that enough available | |
552 | * descriptors. */ | |
553 | assert(vq->last_avail_index != avail_index); | |
554 | uint16_t a_index = vq->last_avail_index % size; | |
555 | uint16_t u_index = vq->last_used_index % size; | |
556 | uint16_t d_index = avail->ring[a_index]; | |
557 | ||
558 | int i = d_index; | |
559 | ||
560 | DPRINT("Post packet to guest on vq:\n"); | |
561 | DPRINT(" size = %d\n", vq->size); | |
562 | DPRINT(" last_avail_index = %d\n", vq->last_avail_index); | |
563 | DPRINT(" last_used_index = %d\n", vq->last_used_index); | |
564 | DPRINT(" a_index = %d\n", a_index); | |
565 | DPRINT(" u_index = %d\n", u_index); | |
566 | DPRINT(" d_index = %d\n", d_index); | |
567 | DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); | |
568 | DPRINT(" desc[%d].len = %d\n", i, desc[i].len); | |
569 | DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); | |
570 | DPRINT(" avail->idx = %d\n", avail_index); | |
571 | DPRINT(" used->idx = %d\n", used->idx); | |
572 | ||
573 | if (!(desc[i].flags & VRING_DESC_F_WRITE)) { | |
574 | /* FIXME: we should find writable descriptor. */ | |
575 | fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); | |
576 | exit(1); | |
577 | } | |
578 | ||
579 | void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); | |
580 | uint32_t chunk_len = desc[i].len; | |
581 | ||
582 | if (len <= chunk_len) { | |
583 | memcpy(chunk_start, buf, len); | |
5c93c473 | 584 | vubr_log_write(dev, desc[i].addr, len); |
3595e2eb VK |
585 | } else { |
586 | fprintf(stderr, | |
587 | "Received too long packet from the backend. Dropping...\n"); | |
588 | return; | |
589 | } | |
590 | ||
591 | /* Add descriptor to the used ring. */ | |
592 | used->ring[u_index].id = d_index; | |
593 | used->ring[u_index].len = len; | |
5c93c473 VK |
594 | vubr_log_write(dev, |
595 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
596 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
597 | |
598 | vq->last_avail_index++; | |
599 | vq->last_used_index++; | |
600 | ||
601 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
602 | vubr_log_write(dev, |
603 | log_guest_addr + offsetof(struct vring_used, idx), | |
604 | sizeof(used->idx)); | |
3595e2eb VK |
605 | |
606 | /* Kick the guest if necessary. */ | |
607 | vubr_virtqueue_kick(vq); | |
608 | } | |
609 | ||
610 | static int | |
611 | vubr_process_desc(VubrDev *dev, VubrVirtq *vq) | |
612 | { | |
5c93c473 | 613 | struct vring_desc *desc = vq->desc; |
3595e2eb | 614 | struct vring_avail *avail = vq->avail; |
5c93c473 VK |
615 | struct vring_used *used = vq->used; |
616 | uint64_t log_guest_addr = vq->log_guest_addr; | |
3595e2eb VK |
617 | |
618 | unsigned int size = vq->size; | |
619 | ||
620 | uint16_t a_index = vq->last_avail_index % size; | |
621 | uint16_t u_index = vq->last_used_index % size; | |
622 | uint16_t d_index = avail->ring[a_index]; | |
623 | ||
624 | uint32_t i, len = 0; | |
625 | size_t buf_size = 4096; | |
626 | uint8_t buf[4096]; | |
627 | ||
628 | DPRINT("Chunks: "); | |
629 | i = d_index; | |
630 | do { | |
631 | void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); | |
632 | uint32_t chunk_len = desc[i].len; | |
633 | ||
5c93c473 VK |
634 | assert(!(desc[i].flags & VRING_DESC_F_WRITE)); |
635 | ||
3595e2eb VK |
636 | if (len + chunk_len < buf_size) { |
637 | memcpy(buf + len, chunk_start, chunk_len); | |
638 | DPRINT("%d ", chunk_len); | |
639 | } else { | |
640 | fprintf(stderr, "Error: too long packet. Dropping...\n"); | |
641 | break; | |
642 | } | |
643 | ||
644 | len += chunk_len; | |
645 | ||
646 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) { | |
647 | break; | |
648 | } | |
649 | ||
650 | i = desc[i].next; | |
651 | } while (1); | |
652 | DPRINT("\n"); | |
653 | ||
654 | if (!len) { | |
655 | return -1; | |
656 | } | |
657 | ||
658 | /* Add descriptor to the used ring. */ | |
659 | used->ring[u_index].id = d_index; | |
660 | used->ring[u_index].len = len; | |
5c93c473 VK |
661 | vubr_log_write(dev, |
662 | log_guest_addr + offsetof(struct vring_used, ring[u_index]), | |
663 | sizeof(used->ring[u_index])); | |
3595e2eb VK |
664 | |
665 | vubr_consume_raw_packet(dev, buf, len); | |
666 | ||
667 | return 0; | |
668 | } | |
669 | ||
670 | static void | |
671 | vubr_process_avail(VubrDev *dev, VubrVirtq *vq) | |
672 | { | |
673 | struct vring_avail *avail = vq->avail; | |
674 | struct vring_used *used = vq->used; | |
5c93c473 | 675 | uint64_t log_guest_addr = vq->log_guest_addr; |
3595e2eb VK |
676 | |
677 | while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { | |
678 | vubr_process_desc(dev, vq); | |
679 | vq->last_avail_index++; | |
680 | vq->last_used_index++; | |
681 | } | |
682 | ||
683 | atomic_mb_set(&used->idx, vq->last_used_index); | |
5c93c473 VK |
684 | vubr_log_write(dev, |
685 | log_guest_addr + offsetof(struct vring_used, idx), | |
686 | sizeof(used->idx)); | |
3595e2eb VK |
687 | } |
688 | ||
689 | static void | |
690 | vubr_backend_recv_cb(int sock, void *ctx) | |
691 | { | |
692 | VubrDev *dev = (VubrDev *) ctx; | |
693 | VubrVirtq *rx_vq = &dev->vq[0]; | |
694 | uint8_t buf[4096]; | |
695 | struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; | |
696 | int hdrlen = sizeof(struct virtio_net_hdr_v1); | |
697 | int buflen = sizeof(buf); | |
698 | int len; | |
699 | ||
5c93c473 VK |
700 | if (!dev->ready) { |
701 | return; | |
702 | } | |
703 | ||
3595e2eb VK |
704 | DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); |
705 | ||
706 | uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); | |
707 | ||
708 | /* If there is no available descriptors, just do nothing. | |
709 | * The buffer will be handled by next arrived UDP packet, | |
710 | * or next kick on receive virtq. */ | |
711 | if (rx_vq->last_avail_index == avail_index) { | |
712 | DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); | |
713 | return; | |
714 | } | |
715 | ||
716 | len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); | |
717 | ||
718 | *hdr = (struct virtio_net_hdr_v1) { }; | |
719 | hdr->num_buffers = 1; | |
720 | vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); | |
721 | } | |
722 | ||
723 | static void | |
724 | vubr_kick_cb(int sock, void *ctx) | |
725 | { | |
726 | VubrDev *dev = (VubrDev *) ctx; | |
727 | eventfd_t kick_data; | |
728 | ssize_t rc; | |
729 | ||
730 | rc = eventfd_read(sock, &kick_data); | |
731 | if (rc == -1) { | |
732 | vubr_die("eventfd_read()"); | |
733 | } else { | |
734 | DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); | |
735 | vubr_process_avail(dev, &dev->vq[1]); | |
736 | } | |
737 | } | |
738 | ||
739 | static int | |
740 | vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
741 | { | |
742 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
743 | return 0; | |
744 | } | |
745 | ||
746 | static int | |
747 | vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
748 | { | |
749 | vmsg->payload.u64 = | |
750 | ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | | |
5c93c473 | 751 | (1ULL << VHOST_F_LOG_ALL) | |
85ea9da5 | 752 | (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | |
5c93c473 VK |
753 | (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); |
754 | ||
3595e2eb VK |
755 | vmsg->size = sizeof(vmsg->payload.u64); |
756 | ||
757 | DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
758 | ||
5c93c473 | 759 | /* Reply */ |
3595e2eb VK |
760 | return 1; |
761 | } | |
762 | ||
763 | static int | |
764 | vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
765 | { | |
766 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
5c93c473 | 767 | dev->features = vmsg->payload.u64; |
3595e2eb VK |
768 | return 0; |
769 | } | |
770 | ||
771 | static int | |
772 | vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
773 | { | |
774 | return 0; | |
775 | } | |
776 | ||
5c93c473 VK |
777 | static void |
778 | vubr_close_log(VubrDev *dev) | |
779 | { | |
780 | if (dev->log_table) { | |
781 | if (munmap(dev->log_table, dev->log_size) != 0) { | |
782 | vubr_die("munmap()"); | |
783 | } | |
784 | ||
785 | dev->log_table = 0; | |
786 | } | |
787 | if (dev->log_call_fd != -1) { | |
788 | close(dev->log_call_fd); | |
789 | dev->log_call_fd = -1; | |
790 | } | |
791 | } | |
792 | ||
3595e2eb VK |
793 | static int |
794 | vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
795 | { | |
5c93c473 VK |
796 | vubr_close_log(dev); |
797 | dev->ready = 0; | |
798 | dev->features = 0; | |
3595e2eb VK |
799 | return 0; |
800 | } | |
801 | ||
802 | static int | |
803 | vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
804 | { | |
805 | int i; | |
806 | VhostUserMemory *memory = &vmsg->payload.memory; | |
807 | dev->nregions = memory->nregions; | |
808 | ||
809 | DPRINT("Nregions: %d\n", memory->nregions); | |
810 | for (i = 0; i < dev->nregions; i++) { | |
811 | void *mmap_addr; | |
812 | VhostUserMemoryRegion *msg_region = &memory->regions[i]; | |
813 | VubrDevRegion *dev_region = &dev->regions[i]; | |
814 | ||
815 | DPRINT("Region %d\n", i); | |
816 | DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", | |
817 | msg_region->guest_phys_addr); | |
818 | DPRINT(" memory_size: 0x%016"PRIx64"\n", | |
819 | msg_region->memory_size); | |
820 | DPRINT(" userspace_addr 0x%016"PRIx64"\n", | |
821 | msg_region->userspace_addr); | |
822 | DPRINT(" mmap_offset 0x%016"PRIx64"\n", | |
823 | msg_region->mmap_offset); | |
824 | ||
5c93c473 VK |
825 | dev_region->gpa = msg_region->guest_phys_addr; |
826 | dev_region->size = msg_region->memory_size; | |
827 | dev_region->qva = msg_region->userspace_addr; | |
3595e2eb VK |
828 | dev_region->mmap_offset = msg_region->mmap_offset; |
829 | ||
830 | /* We don't use offset argument of mmap() since the | |
831 | * mapped address has to be page aligned, and we use huge | |
832 | * pages. */ | |
833 | mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, | |
834 | PROT_READ | PROT_WRITE, MAP_SHARED, | |
835 | vmsg->fds[i], 0); | |
836 | ||
837 | if (mmap_addr == MAP_FAILED) { | |
838 | vubr_die("mmap"); | |
839 | } | |
840 | ||
841 | dev_region->mmap_addr = (uint64_t) mmap_addr; | |
842 | DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); | |
843 | } | |
844 | ||
845 | return 0; | |
846 | } | |
847 | ||
848 | static int | |
849 | vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
850 | { | |
5c93c473 VK |
851 | int fd; |
852 | uint64_t log_mmap_size, log_mmap_offset; | |
853 | void *rc; | |
854 | ||
855 | assert(vmsg->fd_num == 1); | |
856 | fd = vmsg->fds[0]; | |
857 | ||
858 | assert(vmsg->size == sizeof(vmsg->payload.log)); | |
859 | log_mmap_offset = vmsg->payload.log.mmap_offset; | |
860 | log_mmap_size = vmsg->payload.log.mmap_size; | |
861 | DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); | |
862 | DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); | |
863 | ||
864 | rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, | |
865 | log_mmap_offset); | |
866 | if (rc == MAP_FAILED) { | |
867 | vubr_die("mmap"); | |
868 | } | |
869 | dev->log_table = rc; | |
870 | dev->log_size = log_mmap_size; | |
871 | ||
872 | vmsg->size = sizeof(vmsg->payload.u64); | |
873 | /* Reply */ | |
874 | return 1; | |
3595e2eb VK |
875 | } |
876 | ||
877 | static int | |
878 | vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
879 | { | |
5c93c473 VK |
880 | assert(vmsg->fd_num == 1); |
881 | dev->log_call_fd = vmsg->fds[0]; | |
882 | DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); | |
3595e2eb VK |
883 | return 0; |
884 | } | |
885 | ||
886 | static int | |
887 | vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
888 | { | |
889 | unsigned int index = vmsg->payload.state.index; | |
890 | unsigned int num = vmsg->payload.state.num; | |
891 | ||
892 | DPRINT("State.index: %d\n", index); | |
893 | DPRINT("State.num: %d\n", num); | |
894 | dev->vq[index].size = num; | |
895 | return 0; | |
896 | } | |
897 | ||
898 | static int | |
899 | vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
900 | { | |
901 | struct vhost_vring_addr *vra = &vmsg->payload.addr; | |
902 | unsigned int index = vra->index; | |
903 | VubrVirtq *vq = &dev->vq[index]; | |
904 | ||
905 | DPRINT("vhost_vring_addr:\n"); | |
906 | DPRINT(" index: %d\n", vra->index); | |
907 | DPRINT(" flags: %d\n", vra->flags); | |
908 | DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); | |
909 | DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); | |
910 | DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); | |
911 | DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); | |
912 | ||
913 | vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr); | |
914 | vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr); | |
915 | vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr); | |
5c93c473 | 916 | vq->log_guest_addr = vra->log_guest_addr; |
3595e2eb VK |
917 | |
918 | DPRINT("Setting virtq addresses:\n"); | |
919 | DPRINT(" vring_desc at %p\n", vq->desc); | |
920 | DPRINT(" vring_used at %p\n", vq->used); | |
921 | DPRINT(" vring_avail at %p\n", vq->avail); | |
922 | ||
923 | vq->last_used_index = vq->used->idx; | |
924 | return 0; | |
925 | } | |
926 | ||
927 | static int | |
928 | vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
929 | { | |
930 | unsigned int index = vmsg->payload.state.index; | |
931 | unsigned int num = vmsg->payload.state.num; | |
932 | ||
933 | DPRINT("State.index: %d\n", index); | |
934 | DPRINT("State.num: %d\n", num); | |
935 | dev->vq[index].last_avail_index = num; | |
936 | ||
937 | return 0; | |
938 | } | |
939 | ||
940 | static int | |
941 | vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
942 | { | |
5c93c473 VK |
943 | unsigned int index = vmsg->payload.state.index; |
944 | ||
945 | DPRINT("State.index: %d\n", index); | |
946 | vmsg->payload.state.num = dev->vq[index].last_avail_index; | |
947 | vmsg->size = sizeof(vmsg->payload.state); | |
948 | /* FIXME: this is a work-around for a bug in QEMU enabling | |
949 | * too early vrings. When protocol features are enabled, | |
950 | * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ | |
951 | dev->ready = 0; | |
952 | ||
953 | /* Reply */ | |
954 | return 1; | |
3595e2eb VK |
955 | } |
956 | ||
957 | static int | |
958 | vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
959 | { | |
960 | uint64_t u64_arg = vmsg->payload.u64; | |
961 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
962 | ||
963 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
964 | ||
965 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
966 | assert(vmsg->fd_num == 1); | |
967 | ||
968 | dev->vq[index].kick_fd = vmsg->fds[0]; | |
969 | DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
970 | ||
971 | if (index % 2 == 1) { | |
972 | /* TX queue. */ | |
973 | dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, | |
974 | dev, vubr_kick_cb); | |
975 | ||
976 | DPRINT("Waiting for kicks on fd: %d for vq: %d\n", | |
977 | dev->vq[index].kick_fd, index); | |
978 | } | |
5c93c473 VK |
979 | /* We temporarily use this hack to determine that both TX and RX |
980 | * queues are set up and ready for processing. | |
981 | * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and | |
982 | * actual kicks. */ | |
983 | if (dev->vq[0].kick_fd != -1 && | |
984 | dev->vq[1].kick_fd != -1) { | |
985 | dev->ready = 1; | |
986 | DPRINT("vhost-user-bridge is ready for processing queues.\n"); | |
987 | } | |
3595e2eb | 988 | return 0; |
5c93c473 | 989 | |
3595e2eb VK |
990 | } |
991 | ||
992 | static int | |
993 | vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
994 | { | |
995 | uint64_t u64_arg = vmsg->payload.u64; | |
996 | int index = u64_arg & VHOST_USER_VRING_IDX_MASK; | |
997 | ||
998 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
999 | assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); | |
1000 | assert(vmsg->fd_num == 1); | |
1001 | ||
1002 | dev->vq[index].call_fd = vmsg->fds[0]; | |
1003 | DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); | |
1004 | ||
1005 | return 0; | |
1006 | } | |
1007 | ||
1008 | static int | |
1009 | vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1010 | { | |
1011 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1012 | return 0; | |
1013 | } | |
1014 | ||
1015 | static int | |
1016 | vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1017 | { | |
5c93c473 | 1018 | vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; |
3595e2eb | 1019 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); |
5c93c473 VK |
1020 | vmsg->size = sizeof(vmsg->payload.u64); |
1021 | ||
1022 | /* Reply */ | |
1023 | return 1; | |
3595e2eb VK |
1024 | } |
1025 | ||
1026 | static int | |
1027 | vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1028 | { | |
1029 | /* FIXME: unimplented */ | |
1030 | DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); | |
1031 | return 0; | |
1032 | } | |
1033 | ||
1034 | static int | |
1035 | vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1036 | { | |
1037 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1038 | return 0; | |
1039 | } | |
1040 | ||
1041 | static int | |
1042 | vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1043 | { | |
5c93c473 VK |
1044 | unsigned int index = vmsg->payload.state.index; |
1045 | unsigned int enable = vmsg->payload.state.num; | |
1046 | ||
1047 | DPRINT("State.index: %d\n", index); | |
1048 | DPRINT("State.enable: %d\n", enable); | |
1049 | dev->vq[index].enable = enable; | |
3595e2eb VK |
1050 | return 0; |
1051 | } | |
1052 | ||
1053 | static int | |
1054 | vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) | |
1055 | { | |
1056 | DPRINT("Function %s() not implemented yet.\n", __func__); | |
1057 | return 0; | |
1058 | } | |
1059 | ||
1060 | static int | |
1061 | vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) | |
1062 | { | |
1063 | /* Print out generic part of the request. */ | |
1064 | DPRINT( | |
1065 | "================== Vhost user message from QEMU ==================\n"); | |
1066 | DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], | |
1067 | vmsg->request); | |
1068 | DPRINT("Flags: 0x%x\n", vmsg->flags); | |
1069 | DPRINT("Size: %d\n", vmsg->size); | |
1070 | ||
1071 | if (vmsg->fd_num) { | |
1072 | int i; | |
1073 | DPRINT("Fds:"); | |
1074 | for (i = 0; i < vmsg->fd_num; i++) { | |
1075 | DPRINT(" %d", vmsg->fds[i]); | |
1076 | } | |
1077 | DPRINT("\n"); | |
1078 | } | |
1079 | ||
1080 | switch (vmsg->request) { | |
1081 | case VHOST_USER_NONE: | |
1082 | return vubr_none_exec(dev, vmsg); | |
1083 | case VHOST_USER_GET_FEATURES: | |
1084 | return vubr_get_features_exec(dev, vmsg); | |
1085 | case VHOST_USER_SET_FEATURES: | |
1086 | return vubr_set_features_exec(dev, vmsg); | |
1087 | case VHOST_USER_SET_OWNER: | |
1088 | return vubr_set_owner_exec(dev, vmsg); | |
60915dc4 | 1089 | case VHOST_USER_RESET_OWNER: |
3595e2eb VK |
1090 | return vubr_reset_device_exec(dev, vmsg); |
1091 | case VHOST_USER_SET_MEM_TABLE: | |
1092 | return vubr_set_mem_table_exec(dev, vmsg); | |
1093 | case VHOST_USER_SET_LOG_BASE: | |
1094 | return vubr_set_log_base_exec(dev, vmsg); | |
1095 | case VHOST_USER_SET_LOG_FD: | |
1096 | return vubr_set_log_fd_exec(dev, vmsg); | |
1097 | case VHOST_USER_SET_VRING_NUM: | |
1098 | return vubr_set_vring_num_exec(dev, vmsg); | |
1099 | case VHOST_USER_SET_VRING_ADDR: | |
1100 | return vubr_set_vring_addr_exec(dev, vmsg); | |
1101 | case VHOST_USER_SET_VRING_BASE: | |
1102 | return vubr_set_vring_base_exec(dev, vmsg); | |
1103 | case VHOST_USER_GET_VRING_BASE: | |
1104 | return vubr_get_vring_base_exec(dev, vmsg); | |
1105 | case VHOST_USER_SET_VRING_KICK: | |
1106 | return vubr_set_vring_kick_exec(dev, vmsg); | |
1107 | case VHOST_USER_SET_VRING_CALL: | |
1108 | return vubr_set_vring_call_exec(dev, vmsg); | |
1109 | case VHOST_USER_SET_VRING_ERR: | |
1110 | return vubr_set_vring_err_exec(dev, vmsg); | |
1111 | case VHOST_USER_GET_PROTOCOL_FEATURES: | |
1112 | return vubr_get_protocol_features_exec(dev, vmsg); | |
1113 | case VHOST_USER_SET_PROTOCOL_FEATURES: | |
1114 | return vubr_set_protocol_features_exec(dev, vmsg); | |
1115 | case VHOST_USER_GET_QUEUE_NUM: | |
1116 | return vubr_get_queue_num_exec(dev, vmsg); | |
1117 | case VHOST_USER_SET_VRING_ENABLE: | |
1118 | return vubr_set_vring_enable_exec(dev, vmsg); | |
1119 | case VHOST_USER_SEND_RARP: | |
1120 | return vubr_send_rarp_exec(dev, vmsg); | |
1121 | ||
1122 | case VHOST_USER_MAX: | |
1123 | assert(vmsg->request != VHOST_USER_MAX); | |
1124 | } | |
1125 | return 0; | |
1126 | } | |
1127 | ||
1128 | static void | |
1129 | vubr_receive_cb(int sock, void *ctx) | |
1130 | { | |
1131 | VubrDev *dev = (VubrDev *) ctx; | |
1132 | VhostUserMsg vmsg; | |
1133 | int reply_requested; | |
1134 | ||
1135 | vubr_message_read(sock, &vmsg); | |
1136 | reply_requested = vubr_execute_request(dev, &vmsg); | |
1137 | if (reply_requested) { | |
1138 | /* Set the version in the flags when sending the reply */ | |
1139 | vmsg.flags &= ~VHOST_USER_VERSION_MASK; | |
1140 | vmsg.flags |= VHOST_USER_VERSION; | |
1141 | vmsg.flags |= VHOST_USER_REPLY_MASK; | |
1142 | vubr_message_write(sock, &vmsg); | |
1143 | } | |
1144 | } | |
1145 | ||
1146 | static void | |
1147 | vubr_accept_cb(int sock, void *ctx) | |
1148 | { | |
1149 | VubrDev *dev = (VubrDev *)ctx; | |
1150 | int conn_fd; | |
1151 | struct sockaddr_un un; | |
1152 | socklen_t len = sizeof(un); | |
1153 | ||
1154 | conn_fd = accept(sock, (struct sockaddr *) &un, &len); | |
5c93c473 | 1155 | if (conn_fd == -1) { |
3595e2eb VK |
1156 | vubr_die("accept()"); |
1157 | } | |
1158 | DPRINT("Got connection from remote peer on sock %d\n", conn_fd); | |
1159 | dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); | |
1160 | } | |
1161 | ||
1162 | static VubrDev * | |
1163 | vubr_new(const char *path) | |
1164 | { | |
1165 | VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); | |
1166 | dev->nregions = 0; | |
1167 | int i; | |
1168 | struct sockaddr_un un; | |
1169 | size_t len; | |
1170 | ||
1171 | for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { | |
1172 | dev->vq[i] = (VubrVirtq) { | |
1173 | .call_fd = -1, .kick_fd = -1, | |
1174 | .size = 0, | |
1175 | .last_avail_index = 0, .last_used_index = 0, | |
1176 | .desc = 0, .avail = 0, .used = 0, | |
5c93c473 | 1177 | .enable = 0, |
3595e2eb VK |
1178 | }; |
1179 | } | |
1180 | ||
5c93c473 VK |
1181 | /* Init log */ |
1182 | dev->log_call_fd = -1; | |
1183 | dev->log_size = 0; | |
1184 | dev->log_table = 0; | |
1185 | dev->ready = 0; | |
1186 | dev->features = 0; | |
1187 | ||
3595e2eb VK |
1188 | /* Get a UNIX socket. */ |
1189 | dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1190 | if (dev->sock == -1) { | |
1191 | vubr_die("socket"); | |
1192 | } | |
1193 | ||
1194 | un.sun_family = AF_UNIX; | |
1195 | strcpy(un.sun_path, path); | |
1196 | len = sizeof(un.sun_family) + strlen(path); | |
1197 | unlink(path); | |
1198 | ||
1199 | if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { | |
1200 | vubr_die("bind"); | |
1201 | } | |
1202 | ||
1203 | if (listen(dev->sock, 1) == -1) { | |
1204 | vubr_die("listen"); | |
1205 | } | |
1206 | ||
1207 | dispatcher_init(&dev->dispatcher); | |
1208 | dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, | |
1209 | vubr_accept_cb); | |
1210 | ||
1211 | DPRINT("Waiting for connections on UNIX socket %s ...\n", path); | |
1212 | return dev; | |
1213 | } | |
1214 | ||
7cf32491 VK |
1215 | static void |
1216 | vubr_set_host(struct sockaddr_in *saddr, const char *host) | |
1217 | { | |
1218 | if (isdigit(host[0])) { | |
1219 | if (!inet_aton(host, &saddr->sin_addr)) { | |
1220 | fprintf(stderr, "inet_aton() failed.\n"); | |
1221 | exit(1); | |
1222 | } | |
1223 | } else { | |
1224 | struct hostent *he = gethostbyname(host); | |
1225 | ||
1226 | if (!he) { | |
1227 | fprintf(stderr, "gethostbyname() failed.\n"); | |
1228 | exit(1); | |
1229 | } | |
1230 | saddr->sin_addr = *(struct in_addr *)he->h_addr; | |
1231 | } | |
1232 | } | |
1233 | ||
3595e2eb VK |
1234 | static void |
1235 | vubr_backend_udp_setup(VubrDev *dev, | |
1236 | const char *local_host, | |
7cf32491 VK |
1237 | const char *local_port, |
1238 | const char *remote_host, | |
1239 | const char *remote_port) | |
3595e2eb VK |
1240 | { |
1241 | int sock; | |
7cf32491 VK |
1242 | const char *r; |
1243 | ||
1244 | int lport, rport; | |
1245 | ||
1246 | lport = strtol(local_port, (char **)&r, 0); | |
1247 | if (r == local_port) { | |
1248 | fprintf(stderr, "lport parsing failed.\n"); | |
1249 | exit(1); | |
1250 | } | |
1251 | ||
1252 | rport = strtol(remote_port, (char **)&r, 0); | |
1253 | if (r == remote_port) { | |
1254 | fprintf(stderr, "rport parsing failed.\n"); | |
1255 | exit(1); | |
1256 | } | |
1257 | ||
3595e2eb VK |
1258 | struct sockaddr_in si_local = { |
1259 | .sin_family = AF_INET, | |
7cf32491 | 1260 | .sin_port = htons(lport), |
3595e2eb VK |
1261 | }; |
1262 | ||
7cf32491 | 1263 | vubr_set_host(&si_local, local_host); |
3595e2eb VK |
1264 | |
1265 | /* setup destination for sends */ | |
1266 | dev->backend_udp_dest = (struct sockaddr_in) { | |
1267 | .sin_family = AF_INET, | |
7cf32491 | 1268 | .sin_port = htons(rport), |
3595e2eb | 1269 | }; |
7cf32491 | 1270 | vubr_set_host(&dev->backend_udp_dest, remote_host); |
3595e2eb VK |
1271 | |
1272 | sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); | |
1273 | if (sock == -1) { | |
1274 | vubr_die("socket"); | |
1275 | } | |
1276 | ||
1277 | if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { | |
1278 | vubr_die("bind"); | |
1279 | } | |
1280 | ||
1281 | dev->backend_udp_sock = sock; | |
1282 | dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); | |
1283 | DPRINT("Waiting for data from udp backend on %s:%d...\n", | |
7cf32491 | 1284 | local_host, lport); |
3595e2eb VK |
1285 | } |
1286 | ||
1287 | static void | |
1288 | vubr_run(VubrDev *dev) | |
1289 | { | |
1290 | while (1) { | |
1291 | /* timeout 200ms */ | |
1292 | dispatcher_wait(&dev->dispatcher, 200000); | |
1293 | /* Here one can try polling strategy. */ | |
1294 | } | |
1295 | } | |
1296 | ||
7cf32491 VK |
1297 | static int |
1298 | vubr_parse_host_port(const char **host, const char **port, const char *buf) | |
1299 | { | |
1300 | char *p = strchr(buf, ':'); | |
1301 | ||
1302 | if (!p) { | |
1303 | return -1; | |
1304 | } | |
1305 | *p = '\0'; | |
1306 | *host = strdup(buf); | |
1307 | *port = strdup(p + 1); | |
1308 | return 0; | |
1309 | } | |
1310 | ||
1311 | #define DEFAULT_UD_SOCKET "/tmp/vubr.sock" | |
1312 | #define DEFAULT_LHOST "127.0.0.1" | |
1313 | #define DEFAULT_LPORT "4444" | |
1314 | #define DEFAULT_RHOST "127.0.0.1" | |
1315 | #define DEFAULT_RPORT "5555" | |
1316 | ||
1317 | static const char *ud_socket_path = DEFAULT_UD_SOCKET; | |
1318 | static const char *lhost = DEFAULT_LHOST; | |
1319 | static const char *lport = DEFAULT_LPORT; | |
1320 | static const char *rhost = DEFAULT_RHOST; | |
1321 | static const char *rport = DEFAULT_RPORT; | |
1322 | ||
3595e2eb VK |
1323 | int |
1324 | main(int argc, char *argv[]) | |
1325 | { | |
1326 | VubrDev *dev; | |
7cf32491 | 1327 | int opt; |
3595e2eb | 1328 | |
7cf32491 VK |
1329 | while ((opt = getopt(argc, argv, "l:r:u:")) != -1) { |
1330 | ||
1331 | switch (opt) { | |
1332 | case 'l': | |
1333 | if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { | |
1334 | goto out; | |
1335 | } | |
1336 | break; | |
1337 | case 'r': | |
1338 | if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { | |
1339 | goto out; | |
1340 | } | |
1341 | break; | |
1342 | case 'u': | |
1343 | ud_socket_path = strdup(optarg); | |
1344 | break; | |
1345 | default: | |
1346 | goto out; | |
1347 | } | |
1348 | } | |
1349 | ||
1350 | DPRINT("ud socket: %s\n", ud_socket_path); | |
1351 | DPRINT("local: %s:%s\n", lhost, lport); | |
1352 | DPRINT("remote: %s:%s\n", rhost, rport); | |
1353 | ||
1354 | dev = vubr_new(ud_socket_path); | |
3595e2eb VK |
1355 | if (!dev) { |
1356 | return 1; | |
1357 | } | |
1358 | ||
7cf32491 | 1359 | vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); |
3595e2eb VK |
1360 | vubr_run(dev); |
1361 | return 0; | |
7cf32491 VK |
1362 | |
1363 | out: | |
1364 | fprintf(stderr, "Usage: %s ", argv[0]); | |
1365 | fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); | |
1366 | fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", | |
1367 | DEFAULT_UD_SOCKET); | |
1368 | fprintf(stderr, "\t-l local host and port. default: %s:%s\n", | |
1369 | DEFAULT_LHOST, DEFAULT_LPORT); | |
1370 | fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", | |
1371 | DEFAULT_RHOST, DEFAULT_RPORT); | |
1372 | ||
1373 | return 1; | |
3595e2eb | 1374 | } |