]> git.proxmox.com Git - mirror_qemu.git/blob - net/tap.c
net: tap: using bool instead of bitfield
[mirror_qemu.git] / net / tap.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2009 Red Hat, Inc.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "tap_int.h"
27
28 #include "config-host.h"
29
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35
36 #include "net/net.h"
37 #include "clients.h"
38 #include "monitor/monitor.h"
39 #include "sysemu/sysemu.h"
40 #include "qemu-common.h"
41 #include "qemu/error-report.h"
42
43 #include "net/tap.h"
44
45 #include "hw/vhost_net.h"
46
47 /* Maximum GSO packet size (64k) plus plenty of room for
48 * the ethernet and virtio_net headers
49 */
50 #define TAP_BUFSIZE (4096 + 65536)
51
52 typedef struct TAPState {
53 NetClientState nc;
54 int fd;
55 char down_script[1024];
56 char down_script_arg[128];
57 uint8_t buf[TAP_BUFSIZE];
58 bool read_poll;
59 bool write_poll;
60 bool using_vnet_hdr;
61 bool has_ufo;
62 VHostNetState *vhost_net;
63 unsigned host_vnet_hdr_len;
64 } TAPState;
65
66 static int launch_script(const char *setup_script, const char *ifname, int fd);
67
68 static int tap_can_send(void *opaque);
69 static void tap_send(void *opaque);
70 static void tap_writable(void *opaque);
71
72 static void tap_update_fd_handler(TAPState *s)
73 {
74 qemu_set_fd_handler2(s->fd,
75 s->read_poll ? tap_can_send : NULL,
76 s->read_poll ? tap_send : NULL,
77 s->write_poll ? tap_writable : NULL,
78 s);
79 }
80
81 static void tap_read_poll(TAPState *s, bool enable)
82 {
83 s->read_poll = enable;
84 tap_update_fd_handler(s);
85 }
86
87 static void tap_write_poll(TAPState *s, bool enable)
88 {
89 s->write_poll = enable;
90 tap_update_fd_handler(s);
91 }
92
93 static void tap_writable(void *opaque)
94 {
95 TAPState *s = opaque;
96
97 tap_write_poll(s, false);
98
99 qemu_flush_queued_packets(&s->nc);
100 }
101
102 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
103 {
104 ssize_t len;
105
106 do {
107 len = writev(s->fd, iov, iovcnt);
108 } while (len == -1 && errno == EINTR);
109
110 if (len == -1 && errno == EAGAIN) {
111 tap_write_poll(s, true);
112 return 0;
113 }
114
115 return len;
116 }
117
118 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
119 int iovcnt)
120 {
121 TAPState *s = DO_UPCAST(TAPState, nc, nc);
122 const struct iovec *iovp = iov;
123 struct iovec iov_copy[iovcnt + 1];
124 struct virtio_net_hdr_mrg_rxbuf hdr = { };
125
126 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
127 iov_copy[0].iov_base = &hdr;
128 iov_copy[0].iov_len = s->host_vnet_hdr_len;
129 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
130 iovp = iov_copy;
131 iovcnt++;
132 }
133
134 return tap_write_packet(s, iovp, iovcnt);
135 }
136
137 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
138 {
139 TAPState *s = DO_UPCAST(TAPState, nc, nc);
140 struct iovec iov[2];
141 int iovcnt = 0;
142 struct virtio_net_hdr_mrg_rxbuf hdr = { };
143
144 if (s->host_vnet_hdr_len) {
145 iov[iovcnt].iov_base = &hdr;
146 iov[iovcnt].iov_len = s->host_vnet_hdr_len;
147 iovcnt++;
148 }
149
150 iov[iovcnt].iov_base = (char *)buf;
151 iov[iovcnt].iov_len = size;
152 iovcnt++;
153
154 return tap_write_packet(s, iov, iovcnt);
155 }
156
157 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
158 {
159 TAPState *s = DO_UPCAST(TAPState, nc, nc);
160 struct iovec iov[1];
161
162 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
163 return tap_receive_raw(nc, buf, size);
164 }
165
166 iov[0].iov_base = (char *)buf;
167 iov[0].iov_len = size;
168
169 return tap_write_packet(s, iov, 1);
170 }
171
172 static int tap_can_send(void *opaque)
173 {
174 TAPState *s = opaque;
175
176 return qemu_can_send_packet(&s->nc);
177 }
178
179 #ifndef __sun__
180 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
181 {
182 return read(tapfd, buf, maxlen);
183 }
184 #endif
185
186 static void tap_send_completed(NetClientState *nc, ssize_t len)
187 {
188 TAPState *s = DO_UPCAST(TAPState, nc, nc);
189 tap_read_poll(s, true);
190 }
191
192 static void tap_send(void *opaque)
193 {
194 TAPState *s = opaque;
195 int size;
196
197 do {
198 uint8_t *buf = s->buf;
199
200 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
201 if (size <= 0) {
202 break;
203 }
204
205 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
206 buf += s->host_vnet_hdr_len;
207 size -= s->host_vnet_hdr_len;
208 }
209
210 size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
211 if (size == 0) {
212 tap_read_poll(s, false);
213 }
214 } while (size > 0 && qemu_can_send_packet(&s->nc));
215 }
216
217 bool tap_has_ufo(NetClientState *nc)
218 {
219 TAPState *s = DO_UPCAST(TAPState, nc, nc);
220
221 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
222
223 return s->has_ufo;
224 }
225
226 int tap_has_vnet_hdr(NetClientState *nc)
227 {
228 TAPState *s = DO_UPCAST(TAPState, nc, nc);
229
230 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
231
232 return !!s->host_vnet_hdr_len;
233 }
234
235 int tap_has_vnet_hdr_len(NetClientState *nc, int len)
236 {
237 TAPState *s = DO_UPCAST(TAPState, nc, nc);
238
239 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
240
241 return tap_probe_vnet_hdr_len(s->fd, len);
242 }
243
244 void tap_set_vnet_hdr_len(NetClientState *nc, int len)
245 {
246 TAPState *s = DO_UPCAST(TAPState, nc, nc);
247
248 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
249 assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
250 len == sizeof(struct virtio_net_hdr));
251
252 tap_fd_set_vnet_hdr_len(s->fd, len);
253 s->host_vnet_hdr_len = len;
254 }
255
256 void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
257 {
258 TAPState *s = DO_UPCAST(TAPState, nc, nc);
259
260 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
261 assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
262
263 s->using_vnet_hdr = using_vnet_hdr;
264 }
265
266 void tap_set_offload(NetClientState *nc, int csum, int tso4,
267 int tso6, int ecn, int ufo)
268 {
269 TAPState *s = DO_UPCAST(TAPState, nc, nc);
270 if (s->fd < 0) {
271 return;
272 }
273
274 tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
275 }
276
277 static void tap_cleanup(NetClientState *nc)
278 {
279 TAPState *s = DO_UPCAST(TAPState, nc, nc);
280
281 if (s->vhost_net) {
282 vhost_net_cleanup(s->vhost_net);
283 s->vhost_net = NULL;
284 }
285
286 qemu_purge_queued_packets(nc);
287
288 if (s->down_script[0])
289 launch_script(s->down_script, s->down_script_arg, s->fd);
290
291 tap_read_poll(s, false);
292 tap_write_poll(s, false);
293 close(s->fd);
294 s->fd = -1;
295 }
296
297 static void tap_poll(NetClientState *nc, bool enable)
298 {
299 TAPState *s = DO_UPCAST(TAPState, nc, nc);
300 tap_read_poll(s, enable);
301 tap_write_poll(s, enable);
302 }
303
304 int tap_get_fd(NetClientState *nc)
305 {
306 TAPState *s = DO_UPCAST(TAPState, nc, nc);
307 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
308 return s->fd;
309 }
310
311 /* fd support */
312
313 static NetClientInfo net_tap_info = {
314 .type = NET_CLIENT_OPTIONS_KIND_TAP,
315 .size = sizeof(TAPState),
316 .receive = tap_receive,
317 .receive_raw = tap_receive_raw,
318 .receive_iov = tap_receive_iov,
319 .poll = tap_poll,
320 .cleanup = tap_cleanup,
321 };
322
323 static TAPState *net_tap_fd_init(NetClientState *peer,
324 const char *model,
325 const char *name,
326 int fd,
327 int vnet_hdr)
328 {
329 NetClientState *nc;
330 TAPState *s;
331
332 nc = qemu_new_net_client(&net_tap_info, peer, model, name);
333
334 s = DO_UPCAST(TAPState, nc, nc);
335
336 s->fd = fd;
337 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
338 s->using_vnet_hdr = false;
339 s->has_ufo = tap_probe_has_ufo(s->fd);
340 tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
341 /*
342 * Make sure host header length is set correctly in tap:
343 * it might have been modified by another instance of qemu.
344 */
345 if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
346 tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
347 }
348 tap_read_poll(s, true);
349 s->vhost_net = NULL;
350 return s;
351 }
352
353 static int launch_script(const char *setup_script, const char *ifname, int fd)
354 {
355 int pid, status;
356 char *args[3];
357 char **parg;
358
359 /* try to launch network script */
360 pid = fork();
361 if (pid == 0) {
362 int open_max = sysconf(_SC_OPEN_MAX), i;
363
364 for (i = 0; i < open_max; i++) {
365 if (i != STDIN_FILENO &&
366 i != STDOUT_FILENO &&
367 i != STDERR_FILENO &&
368 i != fd) {
369 close(i);
370 }
371 }
372 parg = args;
373 *parg++ = (char *)setup_script;
374 *parg++ = (char *)ifname;
375 *parg = NULL;
376 execv(setup_script, args);
377 _exit(1);
378 } else if (pid > 0) {
379 while (waitpid(pid, &status, 0) != pid) {
380 /* loop */
381 }
382
383 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
384 return 0;
385 }
386 }
387 fprintf(stderr, "%s: could not launch network script\n", setup_script);
388 return -1;
389 }
390
391 static int recv_fd(int c)
392 {
393 int fd;
394 uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
395 struct msghdr msg = {
396 .msg_control = msgbuf,
397 .msg_controllen = sizeof(msgbuf),
398 };
399 struct cmsghdr *cmsg;
400 struct iovec iov;
401 uint8_t req[1];
402 ssize_t len;
403
404 cmsg = CMSG_FIRSTHDR(&msg);
405 cmsg->cmsg_level = SOL_SOCKET;
406 cmsg->cmsg_type = SCM_RIGHTS;
407 cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
408 msg.msg_controllen = cmsg->cmsg_len;
409
410 iov.iov_base = req;
411 iov.iov_len = sizeof(req);
412
413 msg.msg_iov = &iov;
414 msg.msg_iovlen = 1;
415
416 len = recvmsg(c, &msg, 0);
417 if (len > 0) {
418 memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
419 return fd;
420 }
421
422 return len;
423 }
424
425 static int net_bridge_run_helper(const char *helper, const char *bridge)
426 {
427 sigset_t oldmask, mask;
428 int pid, status;
429 char *args[5];
430 char **parg;
431 int sv[2];
432
433 sigemptyset(&mask);
434 sigaddset(&mask, SIGCHLD);
435 sigprocmask(SIG_BLOCK, &mask, &oldmask);
436
437 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
438 return -1;
439 }
440
441 /* try to launch bridge helper */
442 pid = fork();
443 if (pid == 0) {
444 int open_max = sysconf(_SC_OPEN_MAX), i;
445 char fd_buf[6+10];
446 char br_buf[6+IFNAMSIZ] = {0};
447 char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
448
449 for (i = 0; i < open_max; i++) {
450 if (i != STDIN_FILENO &&
451 i != STDOUT_FILENO &&
452 i != STDERR_FILENO &&
453 i != sv[1]) {
454 close(i);
455 }
456 }
457
458 snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
459
460 if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
461 /* assume helper is a command */
462
463 if (strstr(helper, "--br=") == NULL) {
464 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
465 }
466
467 snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
468 helper, "--use-vnet", fd_buf, br_buf);
469
470 parg = args;
471 *parg++ = (char *)"sh";
472 *parg++ = (char *)"-c";
473 *parg++ = helper_cmd;
474 *parg++ = NULL;
475
476 execv("/bin/sh", args);
477 } else {
478 /* assume helper is just the executable path name */
479
480 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
481
482 parg = args;
483 *parg++ = (char *)helper;
484 *parg++ = (char *)"--use-vnet";
485 *parg++ = fd_buf;
486 *parg++ = br_buf;
487 *parg++ = NULL;
488
489 execv(helper, args);
490 }
491 _exit(1);
492
493 } else if (pid > 0) {
494 int fd;
495
496 close(sv[1]);
497
498 do {
499 fd = recv_fd(sv[0]);
500 } while (fd == -1 && errno == EINTR);
501
502 close(sv[0]);
503
504 while (waitpid(pid, &status, 0) != pid) {
505 /* loop */
506 }
507 sigprocmask(SIG_SETMASK, &oldmask, NULL);
508 if (fd < 0) {
509 fprintf(stderr, "failed to recv file descriptor\n");
510 return -1;
511 }
512
513 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
514 return fd;
515 }
516 }
517 fprintf(stderr, "failed to launch bridge helper\n");
518 return -1;
519 }
520
521 int net_init_bridge(const NetClientOptions *opts, const char *name,
522 NetClientState *peer)
523 {
524 const NetdevBridgeOptions *bridge;
525 const char *helper, *br;
526
527 TAPState *s;
528 int fd, vnet_hdr;
529
530 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE);
531 bridge = opts->bridge;
532
533 helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
534 br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE;
535
536 fd = net_bridge_run_helper(helper, br);
537 if (fd == -1) {
538 return -1;
539 }
540
541 fcntl(fd, F_SETFL, O_NONBLOCK);
542
543 vnet_hdr = tap_probe_vnet_hdr(fd);
544
545 s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
546 if (!s) {
547 close(fd);
548 return -1;
549 }
550
551 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
552 br);
553
554 return 0;
555 }
556
557 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
558 const char *setup_script, char *ifname,
559 size_t ifname_sz)
560 {
561 int fd, vnet_hdr_required;
562
563 if (tap->has_ifname) {
564 pstrcpy(ifname, ifname_sz, tap->ifname);
565 } else {
566 assert(ifname_sz > 0);
567 ifname[0] = '\0';
568 }
569
570 if (tap->has_vnet_hdr) {
571 *vnet_hdr = tap->vnet_hdr;
572 vnet_hdr_required = *vnet_hdr;
573 } else {
574 *vnet_hdr = 1;
575 vnet_hdr_required = 0;
576 }
577
578 TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required));
579 if (fd < 0) {
580 return -1;
581 }
582
583 if (setup_script &&
584 setup_script[0] != '\0' &&
585 strcmp(setup_script, "no") != 0 &&
586 launch_script(setup_script, ifname, fd)) {
587 close(fd);
588 return -1;
589 }
590
591 return fd;
592 }
593
594 int net_init_tap(const NetClientOptions *opts, const char *name,
595 NetClientState *peer)
596 {
597 const NetdevTapOptions *tap;
598
599 int fd, vnet_hdr = 0;
600 const char *model;
601 TAPState *s;
602
603 /* for the no-fd, no-helper case */
604 const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
605 char ifname[128];
606
607 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
608 tap = opts->tap;
609
610 if (tap->has_fd) {
611 if (tap->has_ifname || tap->has_script || tap->has_downscript ||
612 tap->has_vnet_hdr || tap->has_helper) {
613 error_report("ifname=, script=, downscript=, vnet_hdr=, "
614 "and helper= are invalid with fd=");
615 return -1;
616 }
617
618 fd = monitor_handle_fd_param(cur_mon, tap->fd);
619 if (fd == -1) {
620 return -1;
621 }
622
623 fcntl(fd, F_SETFL, O_NONBLOCK);
624
625 vnet_hdr = tap_probe_vnet_hdr(fd);
626
627 model = "tap";
628
629 } else if (tap->has_helper) {
630 if (tap->has_ifname || tap->has_script || tap->has_downscript ||
631 tap->has_vnet_hdr) {
632 error_report("ifname=, script=, downscript=, and vnet_hdr= "
633 "are invalid with helper=");
634 return -1;
635 }
636
637 fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE);
638 if (fd == -1) {
639 return -1;
640 }
641
642 fcntl(fd, F_SETFL, O_NONBLOCK);
643
644 vnet_hdr = tap_probe_vnet_hdr(fd);
645
646 model = "bridge";
647
648 } else {
649 script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
650 fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname);
651 if (fd == -1) {
652 return -1;
653 }
654
655 model = "tap";
656 }
657
658 s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
659 if (!s) {
660 close(fd);
661 return -1;
662 }
663
664 if (tap_set_sndbuf(s->fd, tap) < 0) {
665 return -1;
666 }
667
668 if (tap->has_fd) {
669 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
670 } else if (tap->has_helper) {
671 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
672 tap->helper);
673 } else {
674 const char *downscript;
675
676 downscript = tap->has_downscript ? tap->downscript :
677 DEFAULT_NETWORK_DOWN_SCRIPT;
678
679 snprintf(s->nc.info_str, sizeof(s->nc.info_str),
680 "ifname=%s,script=%s,downscript=%s", ifname, script,
681 downscript);
682
683 if (strcmp(downscript, "no") != 0) {
684 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
685 snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
686 }
687 }
688
689 if (tap->has_vhost ? tap->vhost :
690 tap->has_vhostfd || (tap->has_vhostforce && tap->vhostforce)) {
691 int vhostfd;
692
693 if (tap->has_vhostfd) {
694 vhostfd = monitor_handle_fd_param(cur_mon, tap->vhostfd);
695 if (vhostfd == -1) {
696 return -1;
697 }
698 } else {
699 vhostfd = -1;
700 }
701
702 s->vhost_net = vhost_net_init(&s->nc, vhostfd,
703 tap->has_vhostforce && tap->vhostforce);
704 if (!s->vhost_net) {
705 error_report("vhost-net requested but could not be initialized");
706 return -1;
707 }
708 } else if (tap->has_vhostfd) {
709 error_report("vhostfd= is not valid without vhost");
710 return -1;
711 }
712
713 return 0;
714 }
715
716 VHostNetState *tap_get_vhost_net(NetClientState *nc)
717 {
718 TAPState *s = DO_UPCAST(TAPState, nc, nc);
719 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
720 return s->vhost_net;
721 }