4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/types.h>
42 #include <sys/socket.h>
44 #include <sys/queue.h>
53 #include "vhost_user.h"
56 TAILQ_HEAD(vhost_user_connection_list
, vhost_user_connection
);
59 * Every time rte_vhost_driver_register() is invoked, an associated
60 * vhost_user_socket struct will be created.
62 struct vhost_user_socket
{
63 struct vhost_user_connection_list conn_list
;
64 pthread_mutex_t conn_mutex
;
67 struct sockaddr_un un
;
70 bool dequeue_zero_copy
;
73 * The "supported_features" indicates the feature bits the
74 * vhost driver supports. The "features" indicates the feature
75 * bits after the rte_vhost_driver_features_disable/enable().
76 * It is also the final feature bits used for vhost-user
77 * features negotiation.
79 uint64_t supported_features
;
82 struct vhost_device_ops
const *notify_ops
;
85 struct vhost_user_connection
{
86 struct vhost_user_socket
*vsocket
;
90 TAILQ_ENTRY(vhost_user_connection
) next
;
93 #define MAX_VHOST_SOCKET 1024
95 struct vhost_user_socket
*vsockets
[MAX_VHOST_SOCKET
];
98 pthread_mutex_t mutex
;
101 #define MAX_VIRTIO_BACKLOG 128
103 static void vhost_user_server_new_connection(int fd
, void *data
, int *remove
);
104 static void vhost_user_read_cb(int fd
, void *dat
, int *remove
);
105 static int create_unix_socket(struct vhost_user_socket
*vsocket
);
106 static int vhost_user_start_client(struct vhost_user_socket
*vsocket
);
108 static struct vhost_user vhost_user
= {
110 .fd
= { [0 ... MAX_FDS
- 1] = {-1, NULL
, NULL
, NULL
, 0} },
111 .fd_mutex
= PTHREAD_MUTEX_INITIALIZER
,
115 .mutex
= PTHREAD_MUTEX_INITIALIZER
,
118 /* return bytes# of read on success or negative val on failure. */
120 read_fd_message(int sockfd
, char *buf
, int buflen
, int *fds
, int fd_num
)
124 size_t fdsize
= fd_num
* sizeof(int);
125 char control
[CMSG_SPACE(fdsize
)];
126 struct cmsghdr
*cmsg
;
129 memset(&msgh
, 0, sizeof(msgh
));
131 iov
.iov_len
= buflen
;
135 msgh
.msg_control
= control
;
136 msgh
.msg_controllen
= sizeof(control
);
138 ret
= recvmsg(sockfd
, &msgh
, 0);
141 RTE_LOG(ERR
, VHOST_CONFIG
, "recvmsg failed, %s\n", strerror(errno
));
143 RTE_LOG(INFO
, VHOST_CONFIG
, "peer closed\n");
147 if (msgh
.msg_flags
& (MSG_TRUNC
| MSG_CTRUNC
)) {
148 RTE_LOG(ERR
, VHOST_CONFIG
, "truncted msg\n");
152 for (cmsg
= CMSG_FIRSTHDR(&msgh
); cmsg
!= NULL
;
153 cmsg
= CMSG_NXTHDR(&msgh
, cmsg
)) {
154 if ((cmsg
->cmsg_level
== SOL_SOCKET
) &&
155 (cmsg
->cmsg_type
== SCM_RIGHTS
)) {
156 memcpy(fds
, CMSG_DATA(cmsg
), fdsize
);
165 send_fd_message(int sockfd
, char *buf
, int buflen
, int *fds
, int fd_num
)
170 size_t fdsize
= fd_num
* sizeof(int);
171 char control
[CMSG_SPACE(fdsize
)];
172 struct cmsghdr
*cmsg
;
175 memset(&msgh
, 0, sizeof(msgh
));
177 iov
.iov_len
= buflen
;
182 if (fds
&& fd_num
> 0) {
183 msgh
.msg_control
= control
;
184 msgh
.msg_controllen
= sizeof(control
);
185 cmsg
= CMSG_FIRSTHDR(&msgh
);
187 RTE_LOG(ERR
, VHOST_CONFIG
, "cmsg == NULL\n");
191 cmsg
->cmsg_len
= CMSG_LEN(fdsize
);
192 cmsg
->cmsg_level
= SOL_SOCKET
;
193 cmsg
->cmsg_type
= SCM_RIGHTS
;
194 memcpy(CMSG_DATA(cmsg
), fds
, fdsize
);
196 msgh
.msg_control
= NULL
;
197 msgh
.msg_controllen
= 0;
201 ret
= sendmsg(sockfd
, &msgh
, 0);
202 } while (ret
< 0 && errno
== EINTR
);
205 RTE_LOG(ERR
, VHOST_CONFIG
, "sendmsg error\n");
213 vhost_user_add_connection(int fd
, struct vhost_user_socket
*vsocket
)
217 struct vhost_user_connection
*conn
;
220 conn
= malloc(sizeof(*conn
));
226 vid
= vhost_new_device(vsocket
->features
, vsocket
->notify_ops
);
231 size
= strnlen(vsocket
->path
, PATH_MAX
);
232 vhost_set_ifname(vid
, vsocket
->path
, size
);
234 if (vsocket
->dequeue_zero_copy
)
235 vhost_enable_dequeue_zero_copy(vid
);
237 RTE_LOG(INFO
, VHOST_CONFIG
, "new device, handle is %d\n", vid
);
239 if (vsocket
->notify_ops
->new_connection
) {
240 ret
= vsocket
->notify_ops
->new_connection(vid
);
242 RTE_LOG(ERR
, VHOST_CONFIG
,
243 "failed to add vhost user connection with fd %d\n",
250 conn
->vsocket
= vsocket
;
252 ret
= fdset_add(&vhost_user
.fdset
, fd
, vhost_user_read_cb
,
255 RTE_LOG(ERR
, VHOST_CONFIG
,
256 "failed to add fd %d into vhost server fdset\n",
259 if (vsocket
->notify_ops
->destroy_connection
)
260 vsocket
->notify_ops
->destroy_connection(conn
->vid
);
265 pthread_mutex_lock(&vsocket
->conn_mutex
);
266 TAILQ_INSERT_TAIL(&vsocket
->conn_list
, conn
, next
);
267 pthread_mutex_unlock(&vsocket
->conn_mutex
);
275 /* call back when there is new vhost-user connection from client */
277 vhost_user_server_new_connection(int fd
, void *dat
, int *remove __rte_unused
)
279 struct vhost_user_socket
*vsocket
= dat
;
281 fd
= accept(fd
, NULL
, NULL
);
285 RTE_LOG(INFO
, VHOST_CONFIG
, "new vhost user connection is %d\n", fd
);
286 vhost_user_add_connection(fd
, vsocket
);
290 vhost_user_read_cb(int connfd
, void *dat
, int *remove
)
292 struct vhost_user_connection
*conn
= dat
;
293 struct vhost_user_socket
*vsocket
= conn
->vsocket
;
296 ret
= vhost_user_msg_handler(conn
->vid
, connfd
);
299 vhost_destroy_device(conn
->vid
);
301 if (vsocket
->notify_ops
->destroy_connection
)
302 vsocket
->notify_ops
->destroy_connection(conn
->vid
);
304 pthread_mutex_lock(&vsocket
->conn_mutex
);
305 TAILQ_REMOVE(&vsocket
->conn_list
, conn
, next
);
306 if (conn
->connfd
!= -1) {
310 pthread_mutex_unlock(&vsocket
->conn_mutex
);
314 if (vsocket
->reconnect
) {
315 create_unix_socket(vsocket
);
316 vhost_user_start_client(vsocket
);
322 create_unix_socket(struct vhost_user_socket
*vsocket
)
325 struct sockaddr_un
*un
= &vsocket
->un
;
327 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
330 RTE_LOG(INFO
, VHOST_CONFIG
, "vhost-user %s: socket created, fd: %d\n",
331 vsocket
->is_server
? "server" : "client", fd
);
333 if (!vsocket
->is_server
&& fcntl(fd
, F_SETFL
, O_NONBLOCK
)) {
334 RTE_LOG(ERR
, VHOST_CONFIG
,
335 "vhost-user: can't set nonblocking mode for socket, fd: "
336 "%d (%s)\n", fd
, strerror(errno
));
341 memset(un
, 0, sizeof(*un
));
342 un
->sun_family
= AF_UNIX
;
343 strncpy(un
->sun_path
, vsocket
->path
, sizeof(un
->sun_path
));
344 un
->sun_path
[sizeof(un
->sun_path
) - 1] = '\0';
346 vsocket
->socket_fd
= fd
;
351 vhost_user_start_server(struct vhost_user_socket
*vsocket
)
354 int fd
= vsocket
->socket_fd
;
355 const char *path
= vsocket
->path
;
357 ret
= bind(fd
, (struct sockaddr
*)&vsocket
->un
, sizeof(vsocket
->un
));
359 RTE_LOG(ERR
, VHOST_CONFIG
,
360 "failed to bind to %s: %s; remove it and try again\n",
361 path
, strerror(errno
));
364 RTE_LOG(INFO
, VHOST_CONFIG
, "bind to %s\n", path
);
366 ret
= listen(fd
, MAX_VIRTIO_BACKLOG
);
370 ret
= fdset_add(&vhost_user
.fdset
, fd
, vhost_user_server_new_connection
,
373 RTE_LOG(ERR
, VHOST_CONFIG
,
374 "failed to add listen fd %d to vhost server fdset\n",
386 struct vhost_user_reconnect
{
387 struct sockaddr_un un
;
389 struct vhost_user_socket
*vsocket
;
391 TAILQ_ENTRY(vhost_user_reconnect
) next
;
394 TAILQ_HEAD(vhost_user_reconnect_tailq_list
, vhost_user_reconnect
);
395 struct vhost_user_reconnect_list
{
396 struct vhost_user_reconnect_tailq_list head
;
397 pthread_mutex_t mutex
;
400 static struct vhost_user_reconnect_list reconn_list
;
401 static pthread_t reconn_tid
;
404 vhost_user_connect_nonblock(int fd
, struct sockaddr
*un
, size_t sz
)
408 ret
= connect(fd
, un
, sz
);
409 if (ret
< 0 && errno
!= EISCONN
)
412 flags
= fcntl(fd
, F_GETFL
, 0);
414 RTE_LOG(ERR
, VHOST_CONFIG
,
415 "can't get flags for connfd %d\n", fd
);
418 if ((flags
& O_NONBLOCK
) && fcntl(fd
, F_SETFL
, flags
& ~O_NONBLOCK
)) {
419 RTE_LOG(ERR
, VHOST_CONFIG
,
420 "can't disable nonblocking on fd %d\n", fd
);
427 vhost_user_client_reconnect(void *arg __rte_unused
)
430 struct vhost_user_reconnect
*reconn
, *next
;
433 pthread_mutex_lock(&reconn_list
.mutex
);
436 * An equal implementation of TAILQ_FOREACH_SAFE,
437 * which does not exist on all platforms.
439 for (reconn
= TAILQ_FIRST(&reconn_list
.head
);
440 reconn
!= NULL
; reconn
= next
) {
441 next
= TAILQ_NEXT(reconn
, next
);
443 ret
= vhost_user_connect_nonblock(reconn
->fd
,
444 (struct sockaddr
*)&reconn
->un
,
448 RTE_LOG(ERR
, VHOST_CONFIG
,
449 "reconnection for fd %d failed\n",
456 RTE_LOG(INFO
, VHOST_CONFIG
,
457 "%s: connected\n", reconn
->vsocket
->path
);
458 vhost_user_add_connection(reconn
->fd
, reconn
->vsocket
);
460 TAILQ_REMOVE(&reconn_list
.head
, reconn
, next
);
464 pthread_mutex_unlock(&reconn_list
.mutex
);
472 vhost_user_reconnect_init(void)
476 pthread_mutex_init(&reconn_list
.mutex
, NULL
);
477 TAILQ_INIT(&reconn_list
.head
);
479 ret
= pthread_create(&reconn_tid
, NULL
,
480 vhost_user_client_reconnect
, NULL
);
482 RTE_LOG(ERR
, VHOST_CONFIG
, "failed to create reconnect thread");
488 vhost_user_start_client(struct vhost_user_socket
*vsocket
)
491 int fd
= vsocket
->socket_fd
;
492 const char *path
= vsocket
->path
;
493 struct vhost_user_reconnect
*reconn
;
495 ret
= vhost_user_connect_nonblock(fd
, (struct sockaddr
*)&vsocket
->un
,
496 sizeof(vsocket
->un
));
498 vhost_user_add_connection(fd
, vsocket
);
502 RTE_LOG(WARNING
, VHOST_CONFIG
,
503 "failed to connect to %s: %s\n",
504 path
, strerror(errno
));
506 if (ret
== -2 || !vsocket
->reconnect
) {
511 RTE_LOG(INFO
, VHOST_CONFIG
, "%s: reconnecting...\n", path
);
512 reconn
= malloc(sizeof(*reconn
));
513 if (reconn
== NULL
) {
514 RTE_LOG(ERR
, VHOST_CONFIG
,
515 "failed to allocate memory for reconnect\n");
519 reconn
->un
= vsocket
->un
;
521 reconn
->vsocket
= vsocket
;
522 pthread_mutex_lock(&reconn_list
.mutex
);
523 TAILQ_INSERT_TAIL(&reconn_list
.head
, reconn
, next
);
524 pthread_mutex_unlock(&reconn_list
.mutex
);
529 static struct vhost_user_socket
*
530 find_vhost_user_socket(const char *path
)
534 for (i
= 0; i
< vhost_user
.vsocket_cnt
; i
++) {
535 struct vhost_user_socket
*vsocket
= vhost_user
.vsockets
[i
];
537 if (!strcmp(vsocket
->path
, path
))
545 rte_vhost_driver_disable_features(const char *path
, uint64_t features
)
547 struct vhost_user_socket
*vsocket
;
549 pthread_mutex_lock(&vhost_user
.mutex
);
550 vsocket
= find_vhost_user_socket(path
);
552 vsocket
->features
&= ~features
;
553 pthread_mutex_unlock(&vhost_user
.mutex
);
555 return vsocket
? 0 : -1;
559 rte_vhost_driver_enable_features(const char *path
, uint64_t features
)
561 struct vhost_user_socket
*vsocket
;
563 pthread_mutex_lock(&vhost_user
.mutex
);
564 vsocket
= find_vhost_user_socket(path
);
566 if ((vsocket
->supported_features
& features
) != features
) {
568 * trying to enable features the driver doesn't
571 pthread_mutex_unlock(&vhost_user
.mutex
);
574 vsocket
->features
|= features
;
576 pthread_mutex_unlock(&vhost_user
.mutex
);
578 return vsocket
? 0 : -1;
582 rte_vhost_driver_set_features(const char *path
, uint64_t features
)
584 struct vhost_user_socket
*vsocket
;
586 pthread_mutex_lock(&vhost_user
.mutex
);
587 vsocket
= find_vhost_user_socket(path
);
589 vsocket
->supported_features
= features
;
590 vsocket
->features
= features
;
592 pthread_mutex_unlock(&vhost_user
.mutex
);
594 return vsocket
? 0 : -1;
598 rte_vhost_driver_get_features(const char *path
, uint64_t *features
)
600 struct vhost_user_socket
*vsocket
;
602 pthread_mutex_lock(&vhost_user
.mutex
);
603 vsocket
= find_vhost_user_socket(path
);
605 *features
= vsocket
->features
;
606 pthread_mutex_unlock(&vhost_user
.mutex
);
609 RTE_LOG(ERR
, VHOST_CONFIG
,
610 "socket file %s is not registered yet.\n", path
);
618 * Register a new vhost-user socket; here we could act as server
619 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
623 rte_vhost_driver_register(const char *path
, uint64_t flags
)
626 struct vhost_user_socket
*vsocket
;
631 pthread_mutex_lock(&vhost_user
.mutex
);
633 if (vhost_user
.vsocket_cnt
== MAX_VHOST_SOCKET
) {
634 RTE_LOG(ERR
, VHOST_CONFIG
,
635 "error: the number of vhost sockets reaches maximum\n");
639 vsocket
= malloc(sizeof(struct vhost_user_socket
));
642 memset(vsocket
, 0, sizeof(struct vhost_user_socket
));
643 vsocket
->path
= strdup(path
);
644 if (!vsocket
->path
) {
648 TAILQ_INIT(&vsocket
->conn_list
);
649 vsocket
->dequeue_zero_copy
= flags
& RTE_VHOST_USER_DEQUEUE_ZERO_COPY
;
652 * Set the supported features correctly for the builtin vhost-user
655 * Applications know nothing about features the builtin virtio net
656 * driver (virtio_net.c) supports, thus it's not possible for them
657 * to invoke rte_vhost_driver_set_features(). To workaround it, here
658 * we set it unconditionally. If the application want to implement
659 * another vhost-user driver (say SCSI), it should call the
660 * rte_vhost_driver_set_features(), which will overwrite following
663 vsocket
->supported_features
= VIRTIO_NET_SUPPORTED_FEATURES
;
664 vsocket
->features
= VIRTIO_NET_SUPPORTED_FEATURES
;
666 if ((flags
& RTE_VHOST_USER_CLIENT
) != 0) {
667 vsocket
->reconnect
= !(flags
& RTE_VHOST_USER_NO_RECONNECT
);
668 if (vsocket
->reconnect
&& reconn_tid
== 0) {
669 if (vhost_user_reconnect_init() < 0) {
676 vsocket
->is_server
= true;
678 ret
= create_unix_socket(vsocket
);
685 pthread_mutex_init(&vsocket
->conn_mutex
, NULL
);
686 vhost_user
.vsockets
[vhost_user
.vsocket_cnt
++] = vsocket
;
689 pthread_mutex_unlock(&vhost_user
.mutex
);
695 vhost_user_remove_reconnect(struct vhost_user_socket
*vsocket
)
698 struct vhost_user_reconnect
*reconn
, *next
;
700 pthread_mutex_lock(&reconn_list
.mutex
);
702 for (reconn
= TAILQ_FIRST(&reconn_list
.head
);
703 reconn
!= NULL
; reconn
= next
) {
704 next
= TAILQ_NEXT(reconn
, next
);
706 if (reconn
->vsocket
== vsocket
) {
707 TAILQ_REMOVE(&reconn_list
.head
, reconn
, next
);
714 pthread_mutex_unlock(&reconn_list
.mutex
);
719 * Unregister the specified vhost socket
722 rte_vhost_driver_unregister(const char *path
)
726 struct vhost_user_connection
*conn
;
728 pthread_mutex_lock(&vhost_user
.mutex
);
730 for (i
= 0; i
< vhost_user
.vsocket_cnt
; i
++) {
731 struct vhost_user_socket
*vsocket
= vhost_user
.vsockets
[i
];
733 if (!strcmp(vsocket
->path
, path
)) {
734 if (vsocket
->is_server
) {
735 fdset_del(&vhost_user
.fdset
, vsocket
->socket_fd
);
736 close(vsocket
->socket_fd
);
738 } else if (vsocket
->reconnect
) {
739 vhost_user_remove_reconnect(vsocket
);
742 pthread_mutex_lock(&vsocket
->conn_mutex
);
743 TAILQ_FOREACH(conn
, &vsocket
->conn_list
, next
) {
747 pthread_mutex_unlock(&vsocket
->conn_mutex
);
750 pthread_mutex_lock(&vsocket
->conn_mutex
);
751 conn
= TAILQ_FIRST(&vsocket
->conn_list
);
752 pthread_mutex_unlock(&vsocket
->conn_mutex
);
753 } while (conn
!= NULL
);
758 count
= --vhost_user
.vsocket_cnt
;
759 vhost_user
.vsockets
[i
] = vhost_user
.vsockets
[count
];
760 vhost_user
.vsockets
[count
] = NULL
;
761 pthread_mutex_unlock(&vhost_user
.mutex
);
766 pthread_mutex_unlock(&vhost_user
.mutex
);
772 * Register ops so that we can add/remove device to data core.
775 rte_vhost_driver_callback_register(const char *path
,
776 struct vhost_device_ops
const * const ops
)
778 struct vhost_user_socket
*vsocket
;
780 pthread_mutex_lock(&vhost_user
.mutex
);
781 vsocket
= find_vhost_user_socket(path
);
783 vsocket
->notify_ops
= ops
;
784 pthread_mutex_unlock(&vhost_user
.mutex
);
786 return vsocket
? 0 : -1;
789 struct vhost_device_ops
const *
790 vhost_driver_callback_get(const char *path
)
792 struct vhost_user_socket
*vsocket
;
794 pthread_mutex_lock(&vhost_user
.mutex
);
795 vsocket
= find_vhost_user_socket(path
);
796 pthread_mutex_unlock(&vhost_user
.mutex
);
798 return vsocket
? vsocket
->notify_ops
: NULL
;
802 rte_vhost_driver_start(const char *path
)
804 struct vhost_user_socket
*vsocket
;
805 static pthread_t fdset_tid
;
807 pthread_mutex_lock(&vhost_user
.mutex
);
808 vsocket
= find_vhost_user_socket(path
);
809 pthread_mutex_unlock(&vhost_user
.mutex
);
814 if (fdset_tid
== 0) {
815 rte_cpuset_t orig_cpuset
;
816 rte_cpuset_t tmp_cpuset
;
820 CPU_ZERO(&tmp_cpuset
);
821 num_cores
= sysconf(_SC_NPROCESSORS_CONF
);
822 /* Create a mask containing all CPUs */
823 for (i
= 0; i
< num_cores
; i
++) {
824 CPU_SET(i
, &tmp_cpuset
);
827 rte_thread_get_affinity(&orig_cpuset
);
828 rte_thread_set_affinity(&tmp_cpuset
);
829 ret
= pthread_create(&fdset_tid
, NULL
, fdset_event_dispatch
,
831 rte_thread_set_affinity(&orig_cpuset
);
833 RTE_LOG(ERR
, VHOST_CONFIG
,
834 "failed to create fdset handling thread");
837 if (vsocket
->is_server
)
838 return vhost_user_start_server(vsocket
);
840 return vhost_user_start_client(vsocket
);