]>
Commit | Line | Data |
---|---|---|
3595e2eb VK |
1 | /* |
2 | * Vhost User Bridge | |
3 | * | |
4 | * Copyright (c) 2015 Red Hat, Inc. | |
5 | * | |
6 | * Authors: | |
7 | * Victor Kaplansky <victork@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | /* | |
14 | * TODO: | |
15 | * - main should get parameters from the command line. | |
5c93c473 VK |
16 | * - implement all request handlers. Still not implemented: |
17 | * vubr_get_queue_num_exec() | |
18 | * vubr_send_rarp_exec() | |
3595e2eb VK |
19 | * - test for broken requests and virtqueue. |
20 | * - implement features defined by Virtio 1.0 spec. | |
21 | * - support mergeable buffers and indirect descriptors. | |
3595e2eb VK |
22 | * - implement clean shutdown. |
23 | * - implement non-blocking writes to UDP backend. | |
24 | * - implement polling strategy. | |
5c93c473 VK |
25 | * - implement clean starting/stopping of vq processing |
26 | * - implement clean starting/stopping of used and buffers | |
27 | * dirty page logging. | |
3595e2eb VK |
28 | */ |
29 | ||
5c93c473 VK |
30 | #define _FILE_OFFSET_BITS 64 |
31 | ||
681c28a3 | 32 | #include "qemu/osdep.h" |
b7d89466 | 33 | #include "qemu/atomic.h" |
856dfd8a | 34 | #include "qemu/ctype.h" |
e10e798c | 35 | #include "qemu/iov.h" |
3595e2eb | 36 | #include "standard-headers/linux/virtio_net.h" |
0df750e9 | 37 | #include "libvhost-user.h" |
3595e2eb VK |
38 | |
39 | #define VHOST_USER_BRIDGE_DEBUG 1 | |
40 | ||
41 | #define DPRINT(...) \ | |
42 | do { \ | |
43 | if (VHOST_USER_BRIDGE_DEBUG) { \ | |
44 | printf(__VA_ARGS__); \ | |
45 | } \ | |
46 | } while (0) | |
47 | ||
6f5fd837 SH |
48 | enum { |
49 | VHOST_USER_BRIDGE_MAX_QUEUES = 8, | |
50 | }; | |
51 | ||
3595e2eb VK |
52 | typedef void (*CallbackFunc)(int sock, void *ctx); |
53 | ||
54 | typedef struct Event { | |
55 | void *ctx; | |
56 | CallbackFunc callback; | |
57 | } Event; | |
58 | ||
59 | typedef struct Dispatcher { | |
60 | int max_sock; | |
61 | fd_set fdset; | |
62 | Event events[FD_SETSIZE]; | |
63 | } Dispatcher; | |
64 | ||
e10e798c MAL |
65 | typedef struct VubrDev { |
66 | VuDev vudev; | |
67 | Dispatcher dispatcher; | |
68 | int backend_udp_sock; | |
69 | struct sockaddr_in backend_udp_dest; | |
70 | int hdrlen; | |
71 | int sock; | |
72 | int ready; | |
73 | int quit; | |
e3af2928 TB |
74 | struct { |
75 | int fd; | |
76 | void *addr; | |
77 | pthread_t thread; | |
78 | } notifier; | |
e10e798c MAL |
79 | } VubrDev; |
80 | ||
3595e2eb VK |
81 | static void |
82 | vubr_die(const char *s) | |
83 | { | |
84 | perror(s); | |
85 | exit(1); | |
86 | } | |
87 | ||
88 | static int | |
89 | dispatcher_init(Dispatcher *dispr) | |
90 | { | |
91 | FD_ZERO(&dispr->fdset); | |
92 | dispr->max_sock = -1; | |
93 | return 0; | |
94 | } | |
95 | ||
96 | static int | |
97 | dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) | |
98 | { | |
99 | if (sock >= FD_SETSIZE) { | |
100 | fprintf(stderr, | |
101 | "Error: Failed to add new event. sock %d should be less than %d\n", | |
102 | sock, FD_SETSIZE); | |
103 | return -1; | |
104 | } | |
105 | ||
106 | dispr->events[sock].ctx = ctx; | |
107 | dispr->events[sock].callback = cb; | |
108 | ||
109 | FD_SET(sock, &dispr->fdset); | |
110 | if (sock > dispr->max_sock) { | |
111 | dispr->max_sock = sock; | |
112 | } | |
113 | DPRINT("Added sock %d for watching. max_sock: %d\n", | |
114 | sock, dispr->max_sock); | |
115 | return 0; | |
116 | } | |
117 | ||
3595e2eb VK |
118 | static int |
119 | dispatcher_remove(Dispatcher *dispr, int sock) | |
120 | { | |
121 | if (sock >= FD_SETSIZE) { | |
122 | fprintf(stderr, | |
123 | "Error: Failed to remove event. sock %d should be less than %d\n", | |
124 | sock, FD_SETSIZE); | |
125 | return -1; | |
126 | } | |
127 | ||
128 | FD_CLR(sock, &dispr->fdset); | |
6d0b908a | 129 | DPRINT("Sock %d removed from dispatcher watch.\n", sock); |
3595e2eb VK |
130 | return 0; |
131 | } | |
3595e2eb VK |
132 | |
133 | /* timeout in us */ | |
134 | static int | |
135 | dispatcher_wait(Dispatcher *dispr, uint32_t timeout) | |
136 | { | |
137 | struct timeval tv; | |
138 | tv.tv_sec = timeout / 1000000; | |
139 | tv.tv_usec = timeout % 1000000; | |
140 | ||
141 | fd_set fdset = dispr->fdset; | |
142 | ||
143 | /* wait until some of sockets become readable. */ | |
144 | int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); | |
145 | ||
146 | if (rc == -1) { | |
147 | vubr_die("select"); | |
148 | } | |
149 | ||
150 | /* Timeout */ | |
151 | if (rc == 0) { | |
152 | return 0; | |
153 | } | |
154 | ||
155 | /* Now call callback for every ready socket. */ | |
156 | ||
157 | int sock; | |
6d0b908a VK |
158 | for (sock = 0; sock < dispr->max_sock + 1; sock++) { |
159 | /* The callback on a socket can remove other sockets from the | |
160 | * dispatcher, thus we have to check that the socket is | |
161 | * still not removed from dispatcher's list | |
162 | */ | |
163 | if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) { | |
3595e2eb VK |
164 | Event *e = &dispr->events[sock]; |
165 | e->callback(sock, e->ctx); | |
166 | } | |
6d0b908a | 167 | } |
3595e2eb VK |
168 | |
169 | return 0; | |
170 | } | |
171 | ||
3595e2eb | 172 | static void |
e10e798c | 173 | vubr_handle_tx(VuDev *dev, int qidx) |
3595e2eb | 174 | { |
e10e798c MAL |
175 | VuVirtq *vq = vu_get_queue(dev, qidx); |
176 | VubrDev *vubr = container_of(dev, VubrDev, vudev); | |
177 | int hdrlen = vubr->hdrlen; | |
178 | VuVirtqElement *elem = NULL; | |
3595e2eb | 179 | |
e10e798c | 180 | assert(qidx % 2); |
3595e2eb | 181 | |
e10e798c MAL |
182 | for (;;) { |
183 | ssize_t ret; | |
184 | unsigned int out_num; | |
185 | struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg; | |
3595e2eb | 186 | |
e10e798c MAL |
187 | elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); |
188 | if (!elem) { | |
189 | break; | |
3595e2eb | 190 | } |
3595e2eb | 191 | |
e10e798c MAL |
192 | out_num = elem->out_num; |
193 | out_sg = elem->out_sg; | |
194 | if (out_num < 1) { | |
195 | fprintf(stderr, "virtio-net header not in first element\n"); | |
3595e2eb VK |
196 | break; |
197 | } | |
e10e798c MAL |
198 | if (VHOST_USER_BRIDGE_DEBUG) { |
199 | iov_hexdump(out_sg, out_num, stderr, "TX:", 1024); | |
5c93c473 | 200 | } |
e10e798c MAL |
201 | |
202 | if (hdrlen) { | |
203 | unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), | |
204 | out_sg, out_num, | |
205 | hdrlen, -1); | |
206 | out_num = sg_num; | |
207 | out_sg = sg; | |
3595e2eb VK |
208 | } |
209 | ||
e10e798c MAL |
210 | struct msghdr msg = { |
211 | .msg_name = (struct sockaddr *) &vubr->backend_udp_dest, | |
212 | .msg_namelen = sizeof(struct sockaddr_in), | |
213 | .msg_iov = out_sg, | |
214 | .msg_iovlen = out_num, | |
215 | }; | |
216 | do { | |
217 | ret = sendmsg(vubr->backend_udp_sock, &msg, 0); | |
218 | } while (ret == -1 && (errno == EAGAIN || errno == EINTR)); | |
3595e2eb | 219 | |
e10e798c MAL |
220 | if (ret == -1) { |
221 | vubr_die("sendmsg()"); | |
222 | } | |
3595e2eb | 223 | |
e10e798c MAL |
224 | vu_queue_push(dev, vq, elem, 0); |
225 | vu_queue_notify(dev, vq); | |
3595e2eb | 226 | |
e10e798c MAL |
227 | free(elem); |
228 | elem = NULL; | |
3595e2eb | 229 | } |
3595e2eb | 230 | |
e10e798c | 231 | free(elem); |
3595e2eb VK |
232 | } |
233 | ||
277238f9 MAL |
234 | |
235 | /* this function reverse the effect of iov_discard_front() it must be | |
236 | * called with 'front' being the original struct iovec and 'bytes' | |
237 | * being the number of bytes you shaved off | |
238 | */ | |
e10e798c MAL |
239 | static void |
240 | iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes) | |
3595e2eb | 241 | { |
e10e798c | 242 | struct iovec *cur; |
3595e2eb | 243 | |
277238f9 MAL |
244 | for (cur = front; cur != iov; cur++) { |
245 | assert(bytes >= cur->iov_len); | |
e10e798c | 246 | bytes -= cur->iov_len; |
3595e2eb VK |
247 | } |
248 | ||
e10e798c MAL |
249 | cur->iov_base -= bytes; |
250 | cur->iov_len += bytes; | |
3595e2eb VK |
251 | } |
252 | ||
253 | static void | |
e10e798c | 254 | iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes) |
3595e2eb | 255 | { |
e10e798c | 256 | unsigned i; |
3595e2eb | 257 | |
e10e798c MAL |
258 | for (i = 0; i < iovc; i++, iov++) { |
259 | if (bytes < iov->iov_len) { | |
260 | iov->iov_len = bytes; | |
261 | return; | |
262 | } | |
3595e2eb | 263 | |
e10e798c | 264 | bytes -= iov->iov_len; |
5c93c473 | 265 | } |
5c93c473 | 266 | |
e10e798c | 267 | assert(!"couldn't truncate iov"); |
3595e2eb VK |
268 | } |
269 | ||
5c93c473 | 270 | static void |
e10e798c | 271 | vubr_backend_recv_cb(int sock, void *ctx) |
5c93c473 | 272 | { |
e10e798c MAL |
273 | VubrDev *vubr = (VubrDev *) ctx; |
274 | VuDev *dev = &vubr->vudev; | |
275 | VuVirtq *vq = vu_get_queue(dev, 0); | |
276 | VuVirtqElement *elem = NULL; | |
277 | struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; | |
278 | struct virtio_net_hdr_mrg_rxbuf mhdr; | |
279 | unsigned mhdr_cnt = 0; | |
280 | int hdrlen = vubr->hdrlen; | |
281 | int i = 0; | |
282 | struct virtio_net_hdr hdr = { | |
283 | .flags = 0, | |
284 | .gso_type = VIRTIO_NET_HDR_GSO_NONE | |
285 | }; | |
5c93c473 | 286 | |
e10e798c MAL |
287 | DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); |
288 | DPRINT(" hdrlen = %d\n", hdrlen); | |
5c93c473 | 289 | |
e10e798c | 290 | if (!vu_queue_enabled(dev, vq) || |
12176528 | 291 | !vu_queue_started(dev, vq) || |
e10e798c MAL |
292 | !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) { |
293 | DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); | |
5c93c473 VK |
294 | return; |
295 | } | |
296 | ||
241187c1 | 297 | while (1) { |
e10e798c MAL |
298 | struct iovec *sg; |
299 | ssize_t ret, total = 0; | |
300 | unsigned int num; | |
3595e2eb | 301 | |
e10e798c MAL |
302 | elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); |
303 | if (!elem) { | |
a28c393c VK |
304 | break; |
305 | } | |
306 | ||
e10e798c MAL |
307 | if (elem->in_num < 1) { |
308 | fprintf(stderr, "virtio-net contains no in buffers\n"); | |
3595e2eb VK |
309 | break; |
310 | } | |
311 | ||
e10e798c MAL |
312 | sg = elem->in_sg; |
313 | num = elem->in_num; | |
314 | if (i == 0) { | |
315 | if (hdrlen == 12) { | |
316 | mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), | |
317 | sg, elem->in_num, | |
318 | offsetof(typeof(mhdr), num_buffers), | |
319 | sizeof(mhdr.num_buffers)); | |
320 | } | |
321 | iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr); | |
322 | total += hdrlen; | |
277238f9 MAL |
323 | ret = iov_discard_front(&sg, &num, hdrlen); |
324 | assert(ret == hdrlen); | |
3595e2eb VK |
325 | } |
326 | ||
e10e798c MAL |
327 | struct msghdr msg = { |
328 | .msg_name = (struct sockaddr *) &vubr->backend_udp_dest, | |
329 | .msg_namelen = sizeof(struct sockaddr_in), | |
330 | .msg_iov = sg, | |
8f1d22d9 | 331 | .msg_iovlen = num, |
e10e798c MAL |
332 | .msg_flags = MSG_DONTWAIT, |
333 | }; | |
334 | do { | |
335 | ret = recvmsg(vubr->backend_udp_sock, &msg, 0); | |
336 | } while (ret == -1 && (errno == EINTR)); | |
3595e2eb | 337 | |
e10e798c MAL |
338 | if (i == 0) { |
339 | iov_restore_front(elem->in_sg, sg, hdrlen); | |
340 | } | |
3595e2eb | 341 | |
e10e798c MAL |
342 | if (ret == -1) { |
343 | if (errno == EWOULDBLOCK) { | |
344 | vu_queue_rewind(dev, vq, 1); | |
345 | break; | |
346 | } | |
3595e2eb | 347 | |
e10e798c MAL |
348 | vubr_die("recvmsg()"); |
349 | } | |
5c93c473 | 350 | |
e10e798c MAL |
351 | total += ret; |
352 | iov_truncate(elem->in_sg, elem->in_num, total); | |
353 | vu_queue_fill(dev, vq, elem, total, i++); | |
3595e2eb | 354 | |
e10e798c MAL |
355 | free(elem); |
356 | elem = NULL; | |
241187c1 EB |
357 | |
358 | break; /* could loop if DONTWAIT worked? */ | |
359 | } | |
3595e2eb | 360 | |
e10e798c MAL |
361 | if (mhdr_cnt) { |
362 | mhdr.num_buffers = i; | |
363 | iov_from_buf(mhdr_sg, mhdr_cnt, | |
364 | 0, | |
365 | &mhdr.num_buffers, sizeof mhdr.num_buffers); | |
3595e2eb VK |
366 | } |
367 | ||
e10e798c MAL |
368 | vu_queue_flush(dev, vq, i); |
369 | vu_queue_notify(dev, vq); | |
3595e2eb | 370 | |
e10e798c | 371 | free(elem); |
3595e2eb VK |
372 | } |
373 | ||
374 | static void | |
e10e798c | 375 | vubr_receive_cb(int sock, void *ctx) |
3595e2eb | 376 | { |
e10e798c | 377 | VubrDev *vubr = (VubrDev *)ctx; |
3595e2eb | 378 | |
e10e798c MAL |
379 | if (!vu_dispatch(&vubr->vudev)) { |
380 | fprintf(stderr, "Error while dispatching\n"); | |
3595e2eb VK |
381 | } |
382 | } | |
383 | ||
e10e798c MAL |
384 | typedef struct WatchData { |
385 | VuDev *dev; | |
386 | vu_watch_cb cb; | |
387 | void *data; | |
388 | } WatchData; | |
3595e2eb | 389 | |
e10e798c MAL |
390 | static void |
391 | watch_cb(int sock, void *ctx) | |
3595e2eb | 392 | { |
e10e798c | 393 | struct WatchData *wd = ctx; |
3595e2eb | 394 | |
e10e798c | 395 | wd->cb(wd->dev, VU_WATCH_IN, wd->data); |
3595e2eb VK |
396 | } |
397 | ||
e10e798c MAL |
398 | static void |
399 | vubr_set_watch(VuDev *dev, int fd, int condition, | |
400 | vu_watch_cb cb, void *data) | |
3595e2eb | 401 | { |
e10e798c MAL |
402 | VubrDev *vubr = container_of(dev, VubrDev, vudev); |
403 | static WatchData watches[FD_SETSIZE]; | |
404 | struct WatchData *wd = &watches[fd]; | |
a28c393c | 405 | |
e10e798c MAL |
406 | wd->cb = cb; |
407 | wd->data = data; | |
408 | wd->dev = dev; | |
409 | dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb); | |
3595e2eb VK |
410 | } |
411 | ||
5c93c473 | 412 | static void |
e10e798c | 413 | vubr_remove_watch(VuDev *dev, int fd) |
5c93c473 | 414 | { |
e10e798c | 415 | VubrDev *vubr = container_of(dev, VubrDev, vudev); |
5c93c473 | 416 | |
e10e798c | 417 | dispatcher_remove(&vubr->dispatcher, fd); |
3595e2eb VK |
418 | } |
419 | ||
420 | static int | |
e10e798c | 421 | vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg) |
3595e2eb | 422 | { |
e10e798c | 423 | DPRINT("Function %s() not implemented yet.\n", __func__); |
3595e2eb VK |
424 | return 0; |
425 | } | |
426 | ||
427 | static int | |
e10e798c | 428 | vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) |
3595e2eb | 429 | { |
e10e798c MAL |
430 | switch (vmsg->request) { |
431 | case VHOST_USER_SEND_RARP: | |
432 | *do_reply = vubr_send_rarp_exec(dev, vmsg); | |
433 | return 1; | |
434 | default: | |
435 | /* let the library handle the rest */ | |
436 | return 0; | |
5c93c473 | 437 | } |
5c93c473 | 438 | |
3595e2eb VK |
439 | return 0; |
440 | } | |
441 | ||
e10e798c MAL |
442 | static void |
443 | vubr_set_features(VuDev *dev, uint64_t features) | |
3595e2eb | 444 | { |
e10e798c | 445 | VubrDev *vubr = container_of(dev, VubrDev, vudev); |
3595e2eb | 446 | |
e10e798c MAL |
447 | if ((features & (1ULL << VIRTIO_F_VERSION_1)) || |
448 | (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) { | |
449 | vubr->hdrlen = 12; | |
450 | } else { | |
451 | vubr->hdrlen = 10; | |
523b018d | 452 | } |
3595e2eb VK |
453 | } |
454 | ||
e10e798c MAL |
455 | static uint64_t |
456 | vubr_get_features(VuDev *dev) | |
3595e2eb | 457 | { |
e10e798c | 458 | return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE | |
e3af2928 TB |
459 | 1ULL << VIRTIO_NET_F_MRG_RXBUF | |
460 | 1ULL << VIRTIO_F_VERSION_1; | |
3595e2eb VK |
461 | } |
462 | ||
e10e798c MAL |
463 | static void |
464 | vubr_queue_set_started(VuDev *dev, int qidx, bool started) | |
3595e2eb | 465 | { |
e3af2928 | 466 | VubrDev *vubr = container_of(dev, VubrDev, vudev); |
e10e798c | 467 | VuVirtq *vq = vu_get_queue(dev, qidx); |
3595e2eb | 468 | |
e3af2928 TB |
469 | if (started && vubr->notifier.fd >= 0) { |
470 | vu_set_queue_host_notifier(dev, vq, vubr->notifier.fd, | |
8e3b0cbb MAL |
471 | qemu_real_host_page_size(), |
472 | qidx * qemu_real_host_page_size()); | |
e3af2928 TB |
473 | } |
474 | ||
e10e798c MAL |
475 | if (qidx % 2 == 1) { |
476 | vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL); | |
6d0b908a | 477 | } |
3595e2eb VK |
478 | } |
479 | ||
e10e798c MAL |
480 | static void |
481 | vubr_panic(VuDev *dev, const char *msg) | |
3595e2eb | 482 | { |
e10e798c | 483 | VubrDev *vubr = container_of(dev, VubrDev, vudev); |
5c93c473 | 484 | |
e10e798c | 485 | fprintf(stderr, "PANIC: %s\n", msg); |
3595e2eb | 486 | |
e10e798c MAL |
487 | dispatcher_remove(&vubr->dispatcher, dev->sock); |
488 | vubr->quit = 1; | |
3595e2eb VK |
489 | } |
490 | ||
672339f7 MAL |
491 | static bool |
492 | vubr_queue_is_processed_in_order(VuDev *dev, int qidx) | |
493 | { | |
494 | return true; | |
495 | } | |
496 | ||
e10e798c MAL |
497 | static const VuDevIface vuiface = { |
498 | .get_features = vubr_get_features, | |
499 | .set_features = vubr_set_features, | |
500 | .process_msg = vubr_process_msg, | |
501 | .queue_set_started = vubr_queue_set_started, | |
672339f7 | 502 | .queue_is_processed_in_order = vubr_queue_is_processed_in_order, |
e10e798c | 503 | }; |
3595e2eb VK |
504 | |
505 | static void | |
506 | vubr_accept_cb(int sock, void *ctx) | |
507 | { | |
508 | VubrDev *dev = (VubrDev *)ctx; | |
509 | int conn_fd; | |
510 | struct sockaddr_un un; | |
511 | socklen_t len = sizeof(un); | |
512 | ||
513 | conn_fd = accept(sock, (struct sockaddr *) &un, &len); | |
5c93c473 | 514 | if (conn_fd == -1) { |
3595e2eb VK |
515 | vubr_die("accept()"); |
516 | } | |
517 | DPRINT("Got connection from remote peer on sock %d\n", conn_fd); | |
e10e798c | 518 | |
6f5fd837 SH |
519 | if (!vu_init(&dev->vudev, |
520 | VHOST_USER_BRIDGE_MAX_QUEUES, | |
521 | conn_fd, | |
522 | vubr_panic, | |
049f5550 | 523 | NULL, |
6f5fd837 SH |
524 | vubr_set_watch, |
525 | vubr_remove_watch, | |
526 | &vuiface)) { | |
527 | fprintf(stderr, "Failed to initialize libvhost-user\n"); | |
528 | exit(1); | |
529 | } | |
e10e798c | 530 | |
3595e2eb | 531 | dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); |
98206d4e | 532 | dispatcher_remove(&dev->dispatcher, sock); |
3595e2eb VK |
533 | } |
534 | ||
535 | static VubrDev * | |
aef8486e | 536 | vubr_new(const char *path, bool client) |
3595e2eb VK |
537 | { |
538 | VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); | |
3595e2eb | 539 | struct sockaddr_un un; |
aef8486e | 540 | CallbackFunc cb; |
3595e2eb VK |
541 | size_t len; |
542 | ||
f8843514 PM |
543 | if (strlen(path) >= sizeof(un.sun_path)) { |
544 | fprintf(stderr, "unix domain socket path '%s' is too long\n", path); | |
545 | exit(1); | |
546 | } | |
547 | ||
3595e2eb VK |
548 | /* Get a UNIX socket. */ |
549 | dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
550 | if (dev->sock == -1) { | |
551 | vubr_die("socket"); | |
552 | } | |
553 | ||
e3af2928 TB |
554 | dev->notifier.fd = -1; |
555 | ||
3595e2eb VK |
556 | un.sun_family = AF_UNIX; |
557 | strcpy(un.sun_path, path); | |
558 | len = sizeof(un.sun_family) + strlen(path); | |
3595e2eb | 559 | |
aef8486e MAL |
560 | if (!client) { |
561 | unlink(path); | |
3595e2eb | 562 | |
aef8486e MAL |
563 | if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { |
564 | vubr_die("bind"); | |
565 | } | |
566 | ||
567 | if (listen(dev->sock, 1) == -1) { | |
568 | vubr_die("listen"); | |
569 | } | |
570 | cb = vubr_accept_cb; | |
3595e2eb | 571 | |
aef8486e MAL |
572 | DPRINT("Waiting for connections on UNIX socket %s ...\n", path); |
573 | } else { | |
574 | if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) { | |
575 | vubr_die("connect"); | |
576 | } | |
6f5fd837 SH |
577 | |
578 | if (!vu_init(&dev->vudev, | |
579 | VHOST_USER_BRIDGE_MAX_QUEUES, | |
580 | dev->sock, | |
581 | vubr_panic, | |
049f5550 | 582 | NULL, |
6f5fd837 SH |
583 | vubr_set_watch, |
584 | vubr_remove_watch, | |
585 | &vuiface)) { | |
586 | fprintf(stderr, "Failed to initialize libvhost-user\n"); | |
587 | exit(1); | |
588 | } | |
589 | ||
aef8486e | 590 | cb = vubr_receive_cb; |
3595e2eb VK |
591 | } |
592 | ||
593 | dispatcher_init(&dev->dispatcher); | |
e10e798c | 594 | |
aef8486e | 595 | dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb); |
3595e2eb | 596 | |
3595e2eb VK |
597 | return dev; |
598 | } | |
599 | ||
e3af2928 TB |
600 | static void *notifier_thread(void *arg) |
601 | { | |
602 | VuDev *dev = (VuDev *)arg; | |
603 | VubrDev *vubr = container_of(dev, VubrDev, vudev); | |
8e3b0cbb | 604 | int pagesize = qemu_real_host_page_size(); |
e3af2928 TB |
605 | int qidx; |
606 | ||
607 | while (true) { | |
6f5fd837 | 608 | for (qidx = 0; qidx < VHOST_USER_BRIDGE_MAX_QUEUES; qidx++) { |
e3af2928 TB |
609 | uint16_t *n = vubr->notifier.addr + pagesize * qidx; |
610 | ||
611 | if (*n == qidx) { | |
612 | *n = 0xffff; | |
613 | /* We won't miss notifications if we reset | |
614 | * the memory first. */ | |
615 | smp_mb(); | |
616 | ||
617 | DPRINT("Got a notification for queue%d via host notifier.\n", | |
618 | qidx); | |
619 | ||
620 | if (qidx % 2 == 1) { | |
621 | vubr_handle_tx(dev, qidx); | |
622 | } | |
623 | } | |
624 | usleep(1000); | |
625 | } | |
626 | } | |
627 | ||
628 | return NULL; | |
629 | } | |
630 | ||
631 | static void | |
632 | vubr_host_notifier_setup(VubrDev *dev) | |
633 | { | |
634 | char template[] = "/tmp/vubr-XXXXXX"; | |
635 | pthread_t thread; | |
636 | size_t length; | |
637 | void *addr; | |
638 | int fd; | |
639 | ||
8e3b0cbb | 640 | length = qemu_real_host_page_size() * VHOST_USER_BRIDGE_MAX_QUEUES; |
e3af2928 TB |
641 | |
642 | fd = mkstemp(template); | |
643 | if (fd < 0) { | |
644 | vubr_die("mkstemp()"); | |
645 | } | |
646 | ||
647 | if (posix_fallocate(fd, 0, length) != 0) { | |
648 | vubr_die("posix_fallocate()"); | |
649 | } | |
650 | ||
651 | addr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
652 | if (addr == MAP_FAILED) { | |
653 | vubr_die("mmap()"); | |
654 | } | |
655 | ||
656 | memset(addr, 0xff, length); | |
657 | ||
658 | if (pthread_create(&thread, NULL, notifier_thread, &dev->vudev) != 0) { | |
659 | vubr_die("pthread_create()"); | |
660 | } | |
661 | ||
662 | dev->notifier.fd = fd; | |
663 | dev->notifier.addr = addr; | |
664 | dev->notifier.thread = thread; | |
665 | } | |
666 | ||
7cf32491 VK |
667 | static void |
668 | vubr_set_host(struct sockaddr_in *saddr, const char *host) | |
669 | { | |
d18dc3af | 670 | if (qemu_isdigit(host[0])) { |
7cf32491 VK |
671 | if (!inet_aton(host, &saddr->sin_addr)) { |
672 | fprintf(stderr, "inet_aton() failed.\n"); | |
673 | exit(1); | |
674 | } | |
675 | } else { | |
676 | struct hostent *he = gethostbyname(host); | |
677 | ||
678 | if (!he) { | |
679 | fprintf(stderr, "gethostbyname() failed.\n"); | |
680 | exit(1); | |
681 | } | |
682 | saddr->sin_addr = *(struct in_addr *)he->h_addr; | |
683 | } | |
684 | } | |
685 | ||
3595e2eb VK |
686 | static void |
687 | vubr_backend_udp_setup(VubrDev *dev, | |
688 | const char *local_host, | |
7cf32491 VK |
689 | const char *local_port, |
690 | const char *remote_host, | |
691 | const char *remote_port) | |
3595e2eb VK |
692 | { |
693 | int sock; | |
7cf32491 VK |
694 | const char *r; |
695 | ||
696 | int lport, rport; | |
697 | ||
698 | lport = strtol(local_port, (char **)&r, 0); | |
699 | if (r == local_port) { | |
700 | fprintf(stderr, "lport parsing failed.\n"); | |
701 | exit(1); | |
702 | } | |
703 | ||
704 | rport = strtol(remote_port, (char **)&r, 0); | |
705 | if (r == remote_port) { | |
706 | fprintf(stderr, "rport parsing failed.\n"); | |
707 | exit(1); | |
708 | } | |
709 | ||
3595e2eb VK |
710 | struct sockaddr_in si_local = { |
711 | .sin_family = AF_INET, | |
7cf32491 | 712 | .sin_port = htons(lport), |
3595e2eb VK |
713 | }; |
714 | ||
7cf32491 | 715 | vubr_set_host(&si_local, local_host); |
3595e2eb VK |
716 | |
717 | /* setup destination for sends */ | |
718 | dev->backend_udp_dest = (struct sockaddr_in) { | |
719 | .sin_family = AF_INET, | |
7cf32491 | 720 | .sin_port = htons(rport), |
3595e2eb | 721 | }; |
7cf32491 | 722 | vubr_set_host(&dev->backend_udp_dest, remote_host); |
3595e2eb VK |
723 | |
724 | sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); | |
725 | if (sock == -1) { | |
726 | vubr_die("socket"); | |
727 | } | |
728 | ||
729 | if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { | |
730 | vubr_die("bind"); | |
731 | } | |
732 | ||
733 | dev->backend_udp_sock = sock; | |
734 | dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); | |
735 | DPRINT("Waiting for data from udp backend on %s:%d...\n", | |
7cf32491 | 736 | local_host, lport); |
3595e2eb VK |
737 | } |
738 | ||
739 | static void | |
740 | vubr_run(VubrDev *dev) | |
741 | { | |
e10e798c | 742 | while (!dev->quit) { |
3595e2eb VK |
743 | /* timeout 200ms */ |
744 | dispatcher_wait(&dev->dispatcher, 200000); | |
745 | /* Here one can try polling strategy. */ | |
746 | } | |
747 | } | |
748 | ||
7cf32491 VK |
749 | static int |
750 | vubr_parse_host_port(const char **host, const char **port, const char *buf) | |
751 | { | |
752 | char *p = strchr(buf, ':'); | |
753 | ||
754 | if (!p) { | |
755 | return -1; | |
756 | } | |
757 | *p = '\0'; | |
758 | *host = strdup(buf); | |
759 | *port = strdup(p + 1); | |
760 | return 0; | |
761 | } | |
762 | ||
763 | #define DEFAULT_UD_SOCKET "/tmp/vubr.sock" | |
764 | #define DEFAULT_LHOST "127.0.0.1" | |
765 | #define DEFAULT_LPORT "4444" | |
766 | #define DEFAULT_RHOST "127.0.0.1" | |
767 | #define DEFAULT_RPORT "5555" | |
768 | ||
769 | static const char *ud_socket_path = DEFAULT_UD_SOCKET; | |
770 | static const char *lhost = DEFAULT_LHOST; | |
771 | static const char *lport = DEFAULT_LPORT; | |
772 | static const char *rhost = DEFAULT_RHOST; | |
773 | static const char *rport = DEFAULT_RPORT; | |
774 | ||
3595e2eb VK |
775 | int |
776 | main(int argc, char *argv[]) | |
777 | { | |
778 | VubrDev *dev; | |
7cf32491 | 779 | int opt; |
aef8486e | 780 | bool client = false; |
e3af2928 | 781 | bool host_notifier = false; |
3595e2eb | 782 | |
e3af2928 | 783 | while ((opt = getopt(argc, argv, "l:r:u:cH")) != -1) { |
7cf32491 VK |
784 | |
785 | switch (opt) { | |
786 | case 'l': | |
787 | if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) { | |
788 | goto out; | |
789 | } | |
790 | break; | |
791 | case 'r': | |
792 | if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) { | |
793 | goto out; | |
794 | } | |
795 | break; | |
796 | case 'u': | |
797 | ud_socket_path = strdup(optarg); | |
798 | break; | |
aef8486e MAL |
799 | case 'c': |
800 | client = true; | |
801 | break; | |
e3af2928 TB |
802 | case 'H': |
803 | host_notifier = true; | |
804 | break; | |
7cf32491 VK |
805 | default: |
806 | goto out; | |
807 | } | |
808 | } | |
809 | ||
aef8486e MAL |
810 | DPRINT("ud socket: %s (%s)\n", ud_socket_path, |
811 | client ? "client" : "server"); | |
7cf32491 VK |
812 | DPRINT("local: %s:%s\n", lhost, lport); |
813 | DPRINT("remote: %s:%s\n", rhost, rport); | |
814 | ||
aef8486e | 815 | dev = vubr_new(ud_socket_path, client); |
3595e2eb VK |
816 | if (!dev) { |
817 | return 1; | |
818 | } | |
819 | ||
e3af2928 TB |
820 | if (host_notifier) { |
821 | vubr_host_notifier_setup(dev); | |
822 | } | |
823 | ||
7cf32491 | 824 | vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); |
3595e2eb | 825 | vubr_run(dev); |
e10e798c MAL |
826 | |
827 | vu_deinit(&dev->vudev); | |
828 | ||
3595e2eb | 829 | return 0; |
7cf32491 VK |
830 | |
831 | out: | |
832 | fprintf(stderr, "Usage: %s ", argv[0]); | |
e3af2928 | 833 | fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); |
6b3dc992 | 834 | fprintf(stderr, "\t-u path to unix domain socket. default: %s\n", |
7cf32491 VK |
835 | DEFAULT_UD_SOCKET); |
836 | fprintf(stderr, "\t-l local host and port. default: %s:%s\n", | |
837 | DEFAULT_LHOST, DEFAULT_LPORT); | |
838 | fprintf(stderr, "\t-r remote host and port. default: %s:%s\n", | |
839 | DEFAULT_RHOST, DEFAULT_RPORT); | |
aef8486e | 840 | fprintf(stderr, "\t-c client mode\n"); |
e3af2928 | 841 | fprintf(stderr, "\t-H use host notifier\n"); |
7cf32491 VK |
842 | |
843 | return 1; | |
3595e2eb | 844 | } |