]> git.proxmox.com Git - mirror_qemu.git/blob - tests/vhost-user-bridge.c
Merge remote-tracking branch 'remotes/rth/tags/pull-tile-20151030' into staging
[mirror_qemu.git] / tests / vhost-user-bridge.c
1 /*
2 * Vhost User Bridge
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13 /*
14 * TODO:
15 * - main should get parameters from the command line.
16 * - implement all request handlers.
17 * - test for broken requests and virtqueue.
18 * - implement features defined by Virtio 1.0 spec.
19 * - support mergeable buffers and indirect descriptors.
20 * - implement RESET_DEVICE request.
21 * - implement clean shutdown.
22 * - implement non-blocking writes to UDP backend.
23 * - implement polling strategy.
24 */
25
26 #include <stddef.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <inttypes.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <errno.h>
35 #include <sys/types.h>
36 #include <sys/socket.h>
37 #include <sys/un.h>
38 #include <sys/unistd.h>
39 #include <sys/mman.h>
40 #include <sys/eventfd.h>
41 #include <arpa/inet.h>
42
43 #include <linux/vhost.h>
44
45 #include "qemu/atomic.h"
46 #include "standard-headers/linux/virtio_net.h"
47 #include "standard-headers/linux/virtio_ring.h"
48
49 #define VHOST_USER_BRIDGE_DEBUG 1
50
51 #define DPRINT(...) \
52 do { \
53 if (VHOST_USER_BRIDGE_DEBUG) { \
54 printf(__VA_ARGS__); \
55 } \
56 } while (0)
57
58 typedef void (*CallbackFunc)(int sock, void *ctx);
59
60 typedef struct Event {
61 void *ctx;
62 CallbackFunc callback;
63 } Event;
64
65 typedef struct Dispatcher {
66 int max_sock;
67 fd_set fdset;
68 Event events[FD_SETSIZE];
69 } Dispatcher;
70
71 static void
72 vubr_die(const char *s)
73 {
74 perror(s);
75 exit(1);
76 }
77
78 static int
79 dispatcher_init(Dispatcher *dispr)
80 {
81 FD_ZERO(&dispr->fdset);
82 dispr->max_sock = -1;
83 return 0;
84 }
85
86 static int
87 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
88 {
89 if (sock >= FD_SETSIZE) {
90 fprintf(stderr,
91 "Error: Failed to add new event. sock %d should be less than %d\n",
92 sock, FD_SETSIZE);
93 return -1;
94 }
95
96 dispr->events[sock].ctx = ctx;
97 dispr->events[sock].callback = cb;
98
99 FD_SET(sock, &dispr->fdset);
100 if (sock > dispr->max_sock) {
101 dispr->max_sock = sock;
102 }
103 DPRINT("Added sock %d for watching. max_sock: %d\n",
104 sock, dispr->max_sock);
105 return 0;
106 }
107
108 #if 0
109 /* dispatcher_remove() is not currently in use but may be useful
110 * in the future. */
111 static int
112 dispatcher_remove(Dispatcher *dispr, int sock)
113 {
114 if (sock >= FD_SETSIZE) {
115 fprintf(stderr,
116 "Error: Failed to remove event. sock %d should be less than %d\n",
117 sock, FD_SETSIZE);
118 return -1;
119 }
120
121 FD_CLR(sock, &dispr->fdset);
122 return 0;
123 }
124 #endif
125
126 /* timeout in us */
127 static int
128 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
129 {
130 struct timeval tv;
131 tv.tv_sec = timeout / 1000000;
132 tv.tv_usec = timeout % 1000000;
133
134 fd_set fdset = dispr->fdset;
135
136 /* wait until some of sockets become readable. */
137 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
138
139 if (rc == -1) {
140 vubr_die("select");
141 }
142
143 /* Timeout */
144 if (rc == 0) {
145 return 0;
146 }
147
148 /* Now call callback for every ready socket. */
149
150 int sock;
151 for (sock = 0; sock < dispr->max_sock + 1; sock++)
152 if (FD_ISSET(sock, &fdset)) {
153 Event *e = &dispr->events[sock];
154 e->callback(sock, e->ctx);
155 }
156
157 return 0;
158 }
159
160 typedef struct VubrVirtq {
161 int call_fd;
162 int kick_fd;
163 uint32_t size;
164 uint16_t last_avail_index;
165 uint16_t last_used_index;
166 struct vring_desc *desc;
167 struct vring_avail *avail;
168 struct vring_used *used;
169 } VubrVirtq;
170
171 /* Based on qemu/hw/virtio/vhost-user.c */
172
173 #define VHOST_MEMORY_MAX_NREGIONS 8
174 #define VHOST_USER_F_PROTOCOL_FEATURES 30
175
176 enum VhostUserProtocolFeature {
177 VHOST_USER_PROTOCOL_F_MQ = 0,
178 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
179 VHOST_USER_PROTOCOL_F_RARP = 2,
180
181 VHOST_USER_PROTOCOL_F_MAX
182 };
183
184 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
185
186 typedef enum VhostUserRequest {
187 VHOST_USER_NONE = 0,
188 VHOST_USER_GET_FEATURES = 1,
189 VHOST_USER_SET_FEATURES = 2,
190 VHOST_USER_SET_OWNER = 3,
191 VHOST_USER_RESET_DEVICE = 4,
192 VHOST_USER_SET_MEM_TABLE = 5,
193 VHOST_USER_SET_LOG_BASE = 6,
194 VHOST_USER_SET_LOG_FD = 7,
195 VHOST_USER_SET_VRING_NUM = 8,
196 VHOST_USER_SET_VRING_ADDR = 9,
197 VHOST_USER_SET_VRING_BASE = 10,
198 VHOST_USER_GET_VRING_BASE = 11,
199 VHOST_USER_SET_VRING_KICK = 12,
200 VHOST_USER_SET_VRING_CALL = 13,
201 VHOST_USER_SET_VRING_ERR = 14,
202 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
203 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
204 VHOST_USER_GET_QUEUE_NUM = 17,
205 VHOST_USER_SET_VRING_ENABLE = 18,
206 VHOST_USER_SEND_RARP = 19,
207 VHOST_USER_MAX
208 } VhostUserRequest;
209
210 typedef struct VhostUserMemoryRegion {
211 uint64_t guest_phys_addr;
212 uint64_t memory_size;
213 uint64_t userspace_addr;
214 uint64_t mmap_offset;
215 } VhostUserMemoryRegion;
216
217 typedef struct VhostUserMemory {
218 uint32_t nregions;
219 uint32_t padding;
220 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
221 } VhostUserMemory;
222
223 typedef struct VhostUserMsg {
224 VhostUserRequest request;
225
226 #define VHOST_USER_VERSION_MASK (0x3)
227 #define VHOST_USER_REPLY_MASK (0x1<<2)
228 uint32_t flags;
229 uint32_t size; /* the following payload size */
230 union {
231 #define VHOST_USER_VRING_IDX_MASK (0xff)
232 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
233 uint64_t u64;
234 struct vhost_vring_state state;
235 struct vhost_vring_addr addr;
236 VhostUserMemory memory;
237 } payload;
238 int fds[VHOST_MEMORY_MAX_NREGIONS];
239 int fd_num;
240 } QEMU_PACKED VhostUserMsg;
241
242 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
243
244 /* The version of the protocol we support */
245 #define VHOST_USER_VERSION (0x1)
246
247 #define MAX_NR_VIRTQUEUE (8)
248
249 typedef struct VubrDevRegion {
250 /* Guest Physical address. */
251 uint64_t gpa;
252 /* Memory region size. */
253 uint64_t size;
254 /* QEMU virtual address (userspace). */
255 uint64_t qva;
256 /* Starting offset in our mmaped space. */
257 uint64_t mmap_offset;
258 /* Start address of mmaped space. */
259 uint64_t mmap_addr;
260 } VubrDevRegion;
261
262 typedef struct VubrDev {
263 int sock;
264 Dispatcher dispatcher;
265 uint32_t nregions;
266 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
267 VubrVirtq vq[MAX_NR_VIRTQUEUE];
268 int backend_udp_sock;
269 struct sockaddr_in backend_udp_dest;
270 } VubrDev;
271
272 static const char *vubr_request_str[] = {
273 [VHOST_USER_NONE] = "VHOST_USER_NONE",
274 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
275 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
276 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
277 [VHOST_USER_RESET_DEVICE] = "VHOST_USER_RESET_DEVICE",
278 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
279 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
280 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
281 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
282 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
283 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
284 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
285 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
286 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
287 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
288 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
289 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
290 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
291 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
292 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
293 [VHOST_USER_MAX] = "VHOST_USER_MAX",
294 };
295
296 static void
297 print_buffer(uint8_t *buf, size_t len)
298 {
299 int i;
300 printf("Raw buffer:\n");
301 for (i = 0; i < len; i++) {
302 if (i % 16 == 0) {
303 printf("\n");
304 }
305 if (i % 4 == 0) {
306 printf(" ");
307 }
308 printf("%02x ", buf[i]);
309 }
310 printf("\n............................................................\n");
311 }
312
313 /* Translate guest physical address to our virtual address. */
314 static uint64_t
315 gpa_to_va(VubrDev *dev, uint64_t guest_addr)
316 {
317 int i;
318
319 /* Find matching memory region. */
320 for (i = 0; i < dev->nregions; i++) {
321 VubrDevRegion *r = &dev->regions[i];
322
323 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
324 return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
325 }
326 }
327
328 assert(!"address not found in regions");
329 return 0;
330 }
331
332 /* Translate qemu virtual address to our virtual address. */
333 static uint64_t
334 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
335 {
336 int i;
337
338 /* Find matching memory region. */
339 for (i = 0; i < dev->nregions; i++) {
340 VubrDevRegion *r = &dev->regions[i];
341
342 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
343 return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
344 }
345 }
346
347 assert(!"address not found in regions");
348 return 0;
349 }
350
351 static void
352 vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
353 {
354 char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
355 struct iovec iov = {
356 .iov_base = (char *)vmsg,
357 .iov_len = VHOST_USER_HDR_SIZE,
358 };
359 struct msghdr msg = {
360 .msg_iov = &iov,
361 .msg_iovlen = 1,
362 .msg_control = control,
363 .msg_controllen = sizeof(control),
364 };
365 size_t fd_size;
366 struct cmsghdr *cmsg;
367 int rc;
368
369 rc = recvmsg(conn_fd, &msg, 0);
370
371 if (rc <= 0) {
372 vubr_die("recvmsg");
373 }
374
375 vmsg->fd_num = 0;
376 for (cmsg = CMSG_FIRSTHDR(&msg);
377 cmsg != NULL;
378 cmsg = CMSG_NXTHDR(&msg, cmsg))
379 {
380 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
381 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
382 vmsg->fd_num = fd_size / sizeof(int);
383 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
384 break;
385 }
386 }
387
388 if (vmsg->size > sizeof(vmsg->payload)) {
389 fprintf(stderr,
390 "Error: too big message request: %d, size: vmsg->size: %u, "
391 "while sizeof(vmsg->payload) = %lu\n",
392 vmsg->request, vmsg->size, sizeof(vmsg->payload));
393 exit(1);
394 }
395
396 if (vmsg->size) {
397 rc = read(conn_fd, &vmsg->payload, vmsg->size);
398 if (rc <= 0) {
399 vubr_die("recvmsg");
400 }
401
402 assert(rc == vmsg->size);
403 }
404 }
405
406 static void
407 vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
408 {
409 int rc;
410
411 do {
412 rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
413 } while (rc < 0 && errno == EINTR);
414
415 if (rc < 0) {
416 vubr_die("write");
417 }
418 }
419
420 static void
421 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
422 {
423 int slen = sizeof(struct sockaddr_in);
424
425 if (sendto(dev->backend_udp_sock, buf, len, 0,
426 (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
427 vubr_die("sendto()");
428 }
429 }
430
431 static int
432 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
433 {
434 int slen = sizeof(struct sockaddr_in);
435 int rc;
436
437 rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
438 (struct sockaddr *) &dev->backend_udp_dest,
439 (socklen_t *)&slen);
440 if (rc == -1) {
441 vubr_die("recvfrom()");
442 }
443
444 return rc;
445 }
446
447 static void
448 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
449 {
450 int hdrlen = sizeof(struct virtio_net_hdr_v1);
451
452 if (VHOST_USER_BRIDGE_DEBUG) {
453 print_buffer(buf, len);
454 }
455 vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
456 }
457
458 /* Kick the guest if necessary. */
459 static void
460 vubr_virtqueue_kick(VubrVirtq *vq)
461 {
462 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
463 DPRINT("Kicking the guest...\n");
464 eventfd_write(vq->call_fd, 1);
465 }
466 }
467
468 static void
469 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
470 {
471 struct vring_desc *desc = vq->desc;
472 struct vring_avail *avail = vq->avail;
473 struct vring_used *used = vq->used;
474
475 unsigned int size = vq->size;
476
477 uint16_t avail_index = atomic_mb_read(&avail->idx);
478
479 /* We check the available descriptors before posting the
480 * buffer, so here we assume that enough available
481 * descriptors. */
482 assert(vq->last_avail_index != avail_index);
483 uint16_t a_index = vq->last_avail_index % size;
484 uint16_t u_index = vq->last_used_index % size;
485 uint16_t d_index = avail->ring[a_index];
486
487 int i = d_index;
488
489 DPRINT("Post packet to guest on vq:\n");
490 DPRINT(" size = %d\n", vq->size);
491 DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
492 DPRINT(" last_used_index = %d\n", vq->last_used_index);
493 DPRINT(" a_index = %d\n", a_index);
494 DPRINT(" u_index = %d\n", u_index);
495 DPRINT(" d_index = %d\n", d_index);
496 DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
497 DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
498 DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
499 DPRINT(" avail->idx = %d\n", avail_index);
500 DPRINT(" used->idx = %d\n", used->idx);
501
502 if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
503 /* FIXME: we should find writable descriptor. */
504 fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
505 exit(1);
506 }
507
508 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
509 uint32_t chunk_len = desc[i].len;
510
511 if (len <= chunk_len) {
512 memcpy(chunk_start, buf, len);
513 } else {
514 fprintf(stderr,
515 "Received too long packet from the backend. Dropping...\n");
516 return;
517 }
518
519 /* Add descriptor to the used ring. */
520 used->ring[u_index].id = d_index;
521 used->ring[u_index].len = len;
522
523 vq->last_avail_index++;
524 vq->last_used_index++;
525
526 atomic_mb_set(&used->idx, vq->last_used_index);
527
528 /* Kick the guest if necessary. */
529 vubr_virtqueue_kick(vq);
530 }
531
532 static int
533 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
534 {
535 struct vring_desc *desc = vq->desc;
536 struct vring_avail *avail = vq->avail;
537 struct vring_used *used = vq->used;
538
539 unsigned int size = vq->size;
540
541 uint16_t a_index = vq->last_avail_index % size;
542 uint16_t u_index = vq->last_used_index % size;
543 uint16_t d_index = avail->ring[a_index];
544
545 uint32_t i, len = 0;
546 size_t buf_size = 4096;
547 uint8_t buf[4096];
548
549 DPRINT("Chunks: ");
550 i = d_index;
551 do {
552 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
553 uint32_t chunk_len = desc[i].len;
554
555 if (len + chunk_len < buf_size) {
556 memcpy(buf + len, chunk_start, chunk_len);
557 DPRINT("%d ", chunk_len);
558 } else {
559 fprintf(stderr, "Error: too long packet. Dropping...\n");
560 break;
561 }
562
563 len += chunk_len;
564
565 if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
566 break;
567 }
568
569 i = desc[i].next;
570 } while (1);
571 DPRINT("\n");
572
573 if (!len) {
574 return -1;
575 }
576
577 /* Add descriptor to the used ring. */
578 used->ring[u_index].id = d_index;
579 used->ring[u_index].len = len;
580
581 vubr_consume_raw_packet(dev, buf, len);
582
583 return 0;
584 }
585
586 static void
587 vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
588 {
589 struct vring_avail *avail = vq->avail;
590 struct vring_used *used = vq->used;
591
592 while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
593 vubr_process_desc(dev, vq);
594 vq->last_avail_index++;
595 vq->last_used_index++;
596 }
597
598 atomic_mb_set(&used->idx, vq->last_used_index);
599 }
600
601 static void
602 vubr_backend_recv_cb(int sock, void *ctx)
603 {
604 VubrDev *dev = (VubrDev *) ctx;
605 VubrVirtq *rx_vq = &dev->vq[0];
606 uint8_t buf[4096];
607 struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
608 int hdrlen = sizeof(struct virtio_net_hdr_v1);
609 int buflen = sizeof(buf);
610 int len;
611
612 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
613
614 uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
615
616 /* If there is no available descriptors, just do nothing.
617 * The buffer will be handled by next arrived UDP packet,
618 * or next kick on receive virtq. */
619 if (rx_vq->last_avail_index == avail_index) {
620 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
621 return;
622 }
623
624 len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
625
626 *hdr = (struct virtio_net_hdr_v1) { };
627 hdr->num_buffers = 1;
628 vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
629 }
630
631 static void
632 vubr_kick_cb(int sock, void *ctx)
633 {
634 VubrDev *dev = (VubrDev *) ctx;
635 eventfd_t kick_data;
636 ssize_t rc;
637
638 rc = eventfd_read(sock, &kick_data);
639 if (rc == -1) {
640 vubr_die("eventfd_read()");
641 } else {
642 DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
643 vubr_process_avail(dev, &dev->vq[1]);
644 }
645 }
646
647 static int
648 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
649 {
650 DPRINT("Function %s() not implemented yet.\n", __func__);
651 return 0;
652 }
653
654 static int
655 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
656 {
657 vmsg->payload.u64 =
658 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
659 (1ULL << VIRTIO_NET_F_CTRL_VQ) |
660 (1ULL << VIRTIO_NET_F_CTRL_RX) |
661 (1ULL << VHOST_F_LOG_ALL));
662 vmsg->size = sizeof(vmsg->payload.u64);
663
664 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
665
666 /* reply */
667 return 1;
668 }
669
670 static int
671 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
672 {
673 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
674 return 0;
675 }
676
677 static int
678 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
679 {
680 return 0;
681 }
682
683 static int
684 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
685 {
686 DPRINT("Function %s() not implemented yet.\n", __func__);
687 return 0;
688 }
689
690 static int
691 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
692 {
693 int i;
694 VhostUserMemory *memory = &vmsg->payload.memory;
695 dev->nregions = memory->nregions;
696
697 DPRINT("Nregions: %d\n", memory->nregions);
698 for (i = 0; i < dev->nregions; i++) {
699 void *mmap_addr;
700 VhostUserMemoryRegion *msg_region = &memory->regions[i];
701 VubrDevRegion *dev_region = &dev->regions[i];
702
703 DPRINT("Region %d\n", i);
704 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
705 msg_region->guest_phys_addr);
706 DPRINT(" memory_size: 0x%016"PRIx64"\n",
707 msg_region->memory_size);
708 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
709 msg_region->userspace_addr);
710 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
711 msg_region->mmap_offset);
712
713 dev_region->gpa = msg_region->guest_phys_addr;
714 dev_region->size = msg_region->memory_size;
715 dev_region->qva = msg_region->userspace_addr;
716 dev_region->mmap_offset = msg_region->mmap_offset;
717
718 /* We don't use offset argument of mmap() since the
719 * mapped address has to be page aligned, and we use huge
720 * pages. */
721 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
722 PROT_READ | PROT_WRITE, MAP_SHARED,
723 vmsg->fds[i], 0);
724
725 if (mmap_addr == MAP_FAILED) {
726 vubr_die("mmap");
727 }
728
729 dev_region->mmap_addr = (uint64_t) mmap_addr;
730 DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
731 }
732
733 return 0;
734 }
735
736 static int
737 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
738 {
739 DPRINT("Function %s() not implemented yet.\n", __func__);
740 return 0;
741 }
742
743 static int
744 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
745 {
746 DPRINT("Function %s() not implemented yet.\n", __func__);
747 return 0;
748 }
749
750 static int
751 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
752 {
753 unsigned int index = vmsg->payload.state.index;
754 unsigned int num = vmsg->payload.state.num;
755
756 DPRINT("State.index: %d\n", index);
757 DPRINT("State.num: %d\n", num);
758 dev->vq[index].size = num;
759 return 0;
760 }
761
762 static int
763 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
764 {
765 struct vhost_vring_addr *vra = &vmsg->payload.addr;
766 unsigned int index = vra->index;
767 VubrVirtq *vq = &dev->vq[index];
768
769 DPRINT("vhost_vring_addr:\n");
770 DPRINT(" index: %d\n", vra->index);
771 DPRINT(" flags: %d\n", vra->flags);
772 DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
773 DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
774 DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
775 DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
776
777 vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
778 vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
779 vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
780
781 DPRINT("Setting virtq addresses:\n");
782 DPRINT(" vring_desc at %p\n", vq->desc);
783 DPRINT(" vring_used at %p\n", vq->used);
784 DPRINT(" vring_avail at %p\n", vq->avail);
785
786 vq->last_used_index = vq->used->idx;
787 return 0;
788 }
789
790 static int
791 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
792 {
793 unsigned int index = vmsg->payload.state.index;
794 unsigned int num = vmsg->payload.state.num;
795
796 DPRINT("State.index: %d\n", index);
797 DPRINT("State.num: %d\n", num);
798 dev->vq[index].last_avail_index = num;
799
800 return 0;
801 }
802
803 static int
804 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
805 {
806 DPRINT("Function %s() not implemented yet.\n", __func__);
807 return 0;
808 }
809
810 static int
811 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
812 {
813 uint64_t u64_arg = vmsg->payload.u64;
814 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
815
816 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
817
818 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
819 assert(vmsg->fd_num == 1);
820
821 dev->vq[index].kick_fd = vmsg->fds[0];
822 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
823
824 if (index % 2 == 1) {
825 /* TX queue. */
826 dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
827 dev, vubr_kick_cb);
828
829 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
830 dev->vq[index].kick_fd, index);
831 }
832 return 0;
833 }
834
835 static int
836 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
837 {
838 uint64_t u64_arg = vmsg->payload.u64;
839 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
840
841 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
842 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
843 assert(vmsg->fd_num == 1);
844
845 dev->vq[index].call_fd = vmsg->fds[0];
846 DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
847
848 return 0;
849 }
850
851 static int
852 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
853 {
854 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
855 return 0;
856 }
857
858 static int
859 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
860 {
861 /* FIXME: unimplented */
862 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
863 return 0;
864 }
865
866 static int
867 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
868 {
869 /* FIXME: unimplented */
870 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
871 return 0;
872 }
873
874 static int
875 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
876 {
877 DPRINT("Function %s() not implemented yet.\n", __func__);
878 return 0;
879 }
880
881 static int
882 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
883 {
884 DPRINT("Function %s() not implemented yet.\n", __func__);
885 return 0;
886 }
887
888 static int
889 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
890 {
891 DPRINT("Function %s() not implemented yet.\n", __func__);
892 return 0;
893 }
894
895 static int
896 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
897 {
898 /* Print out generic part of the request. */
899 DPRINT(
900 "================== Vhost user message from QEMU ==================\n");
901 DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
902 vmsg->request);
903 DPRINT("Flags: 0x%x\n", vmsg->flags);
904 DPRINT("Size: %d\n", vmsg->size);
905
906 if (vmsg->fd_num) {
907 int i;
908 DPRINT("Fds:");
909 for (i = 0; i < vmsg->fd_num; i++) {
910 DPRINT(" %d", vmsg->fds[i]);
911 }
912 DPRINT("\n");
913 }
914
915 switch (vmsg->request) {
916 case VHOST_USER_NONE:
917 return vubr_none_exec(dev, vmsg);
918 case VHOST_USER_GET_FEATURES:
919 return vubr_get_features_exec(dev, vmsg);
920 case VHOST_USER_SET_FEATURES:
921 return vubr_set_features_exec(dev, vmsg);
922 case VHOST_USER_SET_OWNER:
923 return vubr_set_owner_exec(dev, vmsg);
924 case VHOST_USER_RESET_DEVICE:
925 return vubr_reset_device_exec(dev, vmsg);
926 case VHOST_USER_SET_MEM_TABLE:
927 return vubr_set_mem_table_exec(dev, vmsg);
928 case VHOST_USER_SET_LOG_BASE:
929 return vubr_set_log_base_exec(dev, vmsg);
930 case VHOST_USER_SET_LOG_FD:
931 return vubr_set_log_fd_exec(dev, vmsg);
932 case VHOST_USER_SET_VRING_NUM:
933 return vubr_set_vring_num_exec(dev, vmsg);
934 case VHOST_USER_SET_VRING_ADDR:
935 return vubr_set_vring_addr_exec(dev, vmsg);
936 case VHOST_USER_SET_VRING_BASE:
937 return vubr_set_vring_base_exec(dev, vmsg);
938 case VHOST_USER_GET_VRING_BASE:
939 return vubr_get_vring_base_exec(dev, vmsg);
940 case VHOST_USER_SET_VRING_KICK:
941 return vubr_set_vring_kick_exec(dev, vmsg);
942 case VHOST_USER_SET_VRING_CALL:
943 return vubr_set_vring_call_exec(dev, vmsg);
944 case VHOST_USER_SET_VRING_ERR:
945 return vubr_set_vring_err_exec(dev, vmsg);
946 case VHOST_USER_GET_PROTOCOL_FEATURES:
947 return vubr_get_protocol_features_exec(dev, vmsg);
948 case VHOST_USER_SET_PROTOCOL_FEATURES:
949 return vubr_set_protocol_features_exec(dev, vmsg);
950 case VHOST_USER_GET_QUEUE_NUM:
951 return vubr_get_queue_num_exec(dev, vmsg);
952 case VHOST_USER_SET_VRING_ENABLE:
953 return vubr_set_vring_enable_exec(dev, vmsg);
954 case VHOST_USER_SEND_RARP:
955 return vubr_send_rarp_exec(dev, vmsg);
956
957 case VHOST_USER_MAX:
958 assert(vmsg->request != VHOST_USER_MAX);
959 }
960 return 0;
961 }
962
963 static void
964 vubr_receive_cb(int sock, void *ctx)
965 {
966 VubrDev *dev = (VubrDev *) ctx;
967 VhostUserMsg vmsg;
968 int reply_requested;
969
970 vubr_message_read(sock, &vmsg);
971 reply_requested = vubr_execute_request(dev, &vmsg);
972 if (reply_requested) {
973 /* Set the version in the flags when sending the reply */
974 vmsg.flags &= ~VHOST_USER_VERSION_MASK;
975 vmsg.flags |= VHOST_USER_VERSION;
976 vmsg.flags |= VHOST_USER_REPLY_MASK;
977 vubr_message_write(sock, &vmsg);
978 }
979 }
980
981 static void
982 vubr_accept_cb(int sock, void *ctx)
983 {
984 VubrDev *dev = (VubrDev *)ctx;
985 int conn_fd;
986 struct sockaddr_un un;
987 socklen_t len = sizeof(un);
988
989 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
990 if (conn_fd == -1) {
991 vubr_die("accept()");
992 }
993 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
994 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
995 }
996
997 static VubrDev *
998 vubr_new(const char *path)
999 {
1000 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
1001 dev->nregions = 0;
1002 int i;
1003 struct sockaddr_un un;
1004 size_t len;
1005
1006 for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
1007 dev->vq[i] = (VubrVirtq) {
1008 .call_fd = -1, .kick_fd = -1,
1009 .size = 0,
1010 .last_avail_index = 0, .last_used_index = 0,
1011 .desc = 0, .avail = 0, .used = 0,
1012 };
1013 }
1014
1015 /* Get a UNIX socket. */
1016 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
1017 if (dev->sock == -1) {
1018 vubr_die("socket");
1019 }
1020
1021 un.sun_family = AF_UNIX;
1022 strcpy(un.sun_path, path);
1023 len = sizeof(un.sun_family) + strlen(path);
1024 unlink(path);
1025
1026 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
1027 vubr_die("bind");
1028 }
1029
1030 if (listen(dev->sock, 1) == -1) {
1031 vubr_die("listen");
1032 }
1033
1034 dispatcher_init(&dev->dispatcher);
1035 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
1036 vubr_accept_cb);
1037
1038 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
1039 return dev;
1040 }
1041
1042 static void
1043 vubr_backend_udp_setup(VubrDev *dev,
1044 const char *local_host,
1045 uint16_t local_port,
1046 const char *dest_host,
1047 uint16_t dest_port)
1048 {
1049 int sock;
1050 struct sockaddr_in si_local = {
1051 .sin_family = AF_INET,
1052 .sin_port = htons(local_port),
1053 };
1054
1055 if (inet_aton(local_host, &si_local.sin_addr) == 0) {
1056 fprintf(stderr, "inet_aton() failed.\n");
1057 exit(1);
1058 }
1059
1060 /* setup destination for sends */
1061 dev->backend_udp_dest = (struct sockaddr_in) {
1062 .sin_family = AF_INET,
1063 .sin_port = htons(dest_port),
1064 };
1065 if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
1066 fprintf(stderr, "inet_aton() failed.\n");
1067 exit(1);
1068 }
1069
1070 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
1071 if (sock == -1) {
1072 vubr_die("socket");
1073 }
1074
1075 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
1076 vubr_die("bind");
1077 }
1078
1079 dev->backend_udp_sock = sock;
1080 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
1081 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1082 local_host, local_port);
1083 }
1084
1085 static void
1086 vubr_run(VubrDev *dev)
1087 {
1088 while (1) {
1089 /* timeout 200ms */
1090 dispatcher_wait(&dev->dispatcher, 200000);
1091 /* Here one can try polling strategy. */
1092 }
1093 }
1094
1095 int
1096 main(int argc, char *argv[])
1097 {
1098 VubrDev *dev;
1099
1100 dev = vubr_new("/tmp/vubr.sock");
1101 if (!dev) {
1102 return 1;
1103 }
1104
1105 vubr_backend_udp_setup(dev,
1106 "127.0.0.1", 4444,
1107 "127.0.0.1", 5555);
1108 vubr_run(dev);
1109 return 0;
1110 }