4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/types.h>
42 #include <sys/socket.h>
44 #include <sys/queue.h>
53 #include "vhost_user.h"
56 * Every time rte_vhost_driver_register() is invoked, an associated
57 * vhost_user_socket struct will be created.
59 struct vhost_user_socket
{
65 bool dequeue_zero_copy
;
68 struct vhost_user_connection
{
69 struct vhost_user_socket
*vsocket
;
73 #define MAX_VHOST_SOCKET 1024
75 struct vhost_user_socket
*vsockets
[MAX_VHOST_SOCKET
];
78 pthread_mutex_t mutex
;
81 #define MAX_VIRTIO_BACKLOG 128
83 static void vhost_user_server_new_connection(int fd
, void *data
, int *remove
);
84 static void vhost_user_read_cb(int fd
, void *dat
, int *remove
);
85 static int vhost_user_create_client(struct vhost_user_socket
*vsocket
);
87 static struct vhost_user vhost_user
= {
89 .fd
= { [0 ... MAX_FDS
- 1] = {-1, NULL
, NULL
, NULL
, 0} },
90 .fd_mutex
= PTHREAD_MUTEX_INITIALIZER
,
94 .mutex
= PTHREAD_MUTEX_INITIALIZER
,
97 /* return bytes# of read on success or negative val on failure. */
99 read_fd_message(int sockfd
, char *buf
, int buflen
, int *fds
, int fd_num
)
103 size_t fdsize
= fd_num
* sizeof(int);
104 char control
[CMSG_SPACE(fdsize
)];
105 struct cmsghdr
*cmsg
;
108 memset(&msgh
, 0, sizeof(msgh
));
110 iov
.iov_len
= buflen
;
114 msgh
.msg_control
= control
;
115 msgh
.msg_controllen
= sizeof(control
);
117 ret
= recvmsg(sockfd
, &msgh
, 0);
119 RTE_LOG(ERR
, VHOST_CONFIG
, "recvmsg failed\n");
123 if (msgh
.msg_flags
& (MSG_TRUNC
| MSG_CTRUNC
)) {
124 RTE_LOG(ERR
, VHOST_CONFIG
, "truncted msg\n");
128 for (cmsg
= CMSG_FIRSTHDR(&msgh
); cmsg
!= NULL
;
129 cmsg
= CMSG_NXTHDR(&msgh
, cmsg
)) {
130 if ((cmsg
->cmsg_level
== SOL_SOCKET
) &&
131 (cmsg
->cmsg_type
== SCM_RIGHTS
)) {
132 memcpy(fds
, CMSG_DATA(cmsg
), fdsize
);
141 send_fd_message(int sockfd
, char *buf
, int buflen
, int *fds
, int fd_num
)
146 size_t fdsize
= fd_num
* sizeof(int);
147 char control
[CMSG_SPACE(fdsize
)];
148 struct cmsghdr
*cmsg
;
151 memset(&msgh
, 0, sizeof(msgh
));
153 iov
.iov_len
= buflen
;
158 if (fds
&& fd_num
> 0) {
159 msgh
.msg_control
= control
;
160 msgh
.msg_controllen
= sizeof(control
);
161 cmsg
= CMSG_FIRSTHDR(&msgh
);
163 RTE_LOG(ERR
, VHOST_CONFIG
, "null cmsg\n");
166 cmsg
->cmsg_len
= CMSG_LEN(fdsize
);
167 cmsg
->cmsg_level
= SOL_SOCKET
;
168 cmsg
->cmsg_type
= SCM_RIGHTS
;
169 memcpy(CMSG_DATA(cmsg
), fds
, fdsize
);
171 msgh
.msg_control
= NULL
;
172 msgh
.msg_controllen
= 0;
176 ret
= sendmsg(sockfd
, &msgh
, 0);
177 } while (ret
< 0 && errno
== EINTR
);
180 RTE_LOG(ERR
, VHOST_CONFIG
, "sendmsg error\n");
188 vhost_user_add_connection(int fd
, struct vhost_user_socket
*vsocket
)
192 struct vhost_user_connection
*conn
;
195 conn
= malloc(sizeof(*conn
));
201 vid
= vhost_new_device();
208 size
= strnlen(vsocket
->path
, PATH_MAX
);
209 vhost_set_ifname(vid
, vsocket
->path
, size
);
211 if (vsocket
->dequeue_zero_copy
)
212 vhost_enable_dequeue_zero_copy(vid
);
214 RTE_LOG(INFO
, VHOST_CONFIG
, "new device, handle is %d\n", vid
);
216 vsocket
->connfd
= fd
;
217 conn
->vsocket
= vsocket
;
219 ret
= fdset_add(&vhost_user
.fdset
, fd
, vhost_user_read_cb
,
222 vsocket
->connfd
= -1;
225 RTE_LOG(ERR
, VHOST_CONFIG
,
226 "failed to add fd %d into vhost server fdset\n",
231 /* call back when there is new vhost-user connection from client */
233 vhost_user_server_new_connection(int fd
, void *dat
, int *remove __rte_unused
)
235 struct vhost_user_socket
*vsocket
= dat
;
237 fd
= accept(fd
, NULL
, NULL
);
241 RTE_LOG(INFO
, VHOST_CONFIG
, "new vhost user connection is %d\n", fd
);
242 vhost_user_add_connection(fd
, vsocket
);
246 vhost_user_read_cb(int connfd
, void *dat
, int *remove
)
248 struct vhost_user_connection
*conn
= dat
;
249 struct vhost_user_socket
*vsocket
= conn
->vsocket
;
252 ret
= vhost_user_msg_handler(conn
->vid
, connfd
);
254 vsocket
->connfd
= -1;
257 vhost_destroy_device(conn
->vid
);
260 if (vsocket
->reconnect
)
261 vhost_user_create_client(vsocket
);
266 create_unix_socket(const char *path
, struct sockaddr_un
*un
, bool is_server
)
270 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
273 RTE_LOG(INFO
, VHOST_CONFIG
, "vhost-user %s: socket created, fd: %d\n",
274 is_server
? "server" : "client", fd
);
276 if (!is_server
&& fcntl(fd
, F_SETFL
, O_NONBLOCK
)) {
277 RTE_LOG(ERR
, VHOST_CONFIG
,
278 "vhost-user: can't set nonblocking mode for socket, fd: "
279 "%d (%s)\n", fd
, strerror(errno
));
284 memset(un
, 0, sizeof(*un
));
285 un
->sun_family
= AF_UNIX
;
286 strncpy(un
->sun_path
, path
, sizeof(un
->sun_path
));
287 un
->sun_path
[sizeof(un
->sun_path
) - 1] = '\0';
293 vhost_user_create_server(struct vhost_user_socket
*vsocket
)
297 struct sockaddr_un un
;
298 const char *path
= vsocket
->path
;
300 fd
= create_unix_socket(path
, &un
, vsocket
->is_server
);
304 ret
= bind(fd
, (struct sockaddr
*)&un
, sizeof(un
));
306 RTE_LOG(ERR
, VHOST_CONFIG
,
307 "failed to bind to %s: %s; remove it and try again\n",
308 path
, strerror(errno
));
311 RTE_LOG(INFO
, VHOST_CONFIG
, "bind to %s\n", path
);
313 ret
= listen(fd
, MAX_VIRTIO_BACKLOG
);
317 vsocket
->listenfd
= fd
;
318 ret
= fdset_add(&vhost_user
.fdset
, fd
, vhost_user_server_new_connection
,
321 RTE_LOG(ERR
, VHOST_CONFIG
,
322 "failed to add listen fd %d to vhost server fdset\n",
334 struct vhost_user_reconnect
{
335 struct sockaddr_un un
;
337 struct vhost_user_socket
*vsocket
;
339 TAILQ_ENTRY(vhost_user_reconnect
) next
;
342 TAILQ_HEAD(vhost_user_reconnect_tailq_list
, vhost_user_reconnect
);
343 struct vhost_user_reconnect_list
{
344 struct vhost_user_reconnect_tailq_list head
;
345 pthread_mutex_t mutex
;
348 static struct vhost_user_reconnect_list reconn_list
;
349 static pthread_t reconn_tid
;
352 vhost_user_connect_nonblock(int fd
, struct sockaddr
*un
, size_t sz
)
356 ret
= connect(fd
, un
, sz
);
357 if (ret
< 0 && errno
!= EISCONN
)
360 flags
= fcntl(fd
, F_GETFL
, 0);
362 RTE_LOG(ERR
, VHOST_CONFIG
,
363 "can't get flags for connfd %d\n", fd
);
366 if ((flags
& O_NONBLOCK
) && fcntl(fd
, F_SETFL
, flags
& ~O_NONBLOCK
)) {
367 RTE_LOG(ERR
, VHOST_CONFIG
,
368 "can't disable nonblocking on fd %d\n", fd
);
375 vhost_user_client_reconnect(void *arg __rte_unused
)
378 struct vhost_user_reconnect
*reconn
, *next
;
381 pthread_mutex_lock(&reconn_list
.mutex
);
384 * An equal implementation of TAILQ_FOREACH_SAFE,
385 * which does not exist on all platforms.
387 for (reconn
= TAILQ_FIRST(&reconn_list
.head
);
388 reconn
!= NULL
; reconn
= next
) {
389 next
= TAILQ_NEXT(reconn
, next
);
391 ret
= vhost_user_connect_nonblock(reconn
->fd
,
392 (struct sockaddr
*)&reconn
->un
,
396 RTE_LOG(ERR
, VHOST_CONFIG
,
397 "reconnection for fd %d failed\n",
404 RTE_LOG(INFO
, VHOST_CONFIG
,
405 "%s: connected\n", reconn
->vsocket
->path
);
406 vhost_user_add_connection(reconn
->fd
, reconn
->vsocket
);
408 TAILQ_REMOVE(&reconn_list
.head
, reconn
, next
);
412 pthread_mutex_unlock(&reconn_list
.mutex
);
420 vhost_user_reconnect_init(void)
424 pthread_mutex_init(&reconn_list
.mutex
, NULL
);
425 TAILQ_INIT(&reconn_list
.head
);
427 ret
= pthread_create(&reconn_tid
, NULL
,
428 vhost_user_client_reconnect
, NULL
);
430 RTE_LOG(ERR
, VHOST_CONFIG
, "failed to create reconnect thread");
436 vhost_user_create_client(struct vhost_user_socket
*vsocket
)
440 struct sockaddr_un un
;
441 const char *path
= vsocket
->path
;
442 struct vhost_user_reconnect
*reconn
;
444 fd
= create_unix_socket(path
, &un
, vsocket
->is_server
);
448 ret
= vhost_user_connect_nonblock(fd
, (struct sockaddr
*)&un
,
451 vhost_user_add_connection(fd
, vsocket
);
455 RTE_LOG(ERR
, VHOST_CONFIG
,
456 "failed to connect to %s: %s\n",
457 path
, strerror(errno
));
459 if (ret
== -2 || !vsocket
->reconnect
) {
464 RTE_LOG(ERR
, VHOST_CONFIG
, "%s: reconnecting...\n", path
);
465 reconn
= malloc(sizeof(*reconn
));
466 if (reconn
== NULL
) {
467 RTE_LOG(ERR
, VHOST_CONFIG
,
468 "failed to allocate memory for reconnect\n");
474 reconn
->vsocket
= vsocket
;
475 pthread_mutex_lock(&reconn_list
.mutex
);
476 TAILQ_INSERT_TAIL(&reconn_list
.head
, reconn
, next
);
477 pthread_mutex_unlock(&reconn_list
.mutex
);
483 * Register a new vhost-user socket; here we could act as server
484 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
488 rte_vhost_driver_register(const char *path
, uint64_t flags
)
491 struct vhost_user_socket
*vsocket
;
496 pthread_mutex_lock(&vhost_user
.mutex
);
498 if (vhost_user
.vsocket_cnt
== MAX_VHOST_SOCKET
) {
499 RTE_LOG(ERR
, VHOST_CONFIG
,
500 "error: the number of vhost sockets reaches maximum\n");
504 vsocket
= malloc(sizeof(struct vhost_user_socket
));
507 memset(vsocket
, 0, sizeof(struct vhost_user_socket
));
508 vsocket
->path
= strdup(path
);
509 vsocket
->connfd
= -1;
510 vsocket
->dequeue_zero_copy
= flags
& RTE_VHOST_USER_DEQUEUE_ZERO_COPY
;
512 if ((flags
& RTE_VHOST_USER_CLIENT
) != 0) {
513 vsocket
->reconnect
= !(flags
& RTE_VHOST_USER_NO_RECONNECT
);
514 if (vsocket
->reconnect
&& reconn_tid
== 0) {
515 if (vhost_user_reconnect_init() < 0) {
521 ret
= vhost_user_create_client(vsocket
);
523 vsocket
->is_server
= true;
524 ret
= vhost_user_create_server(vsocket
);
532 vhost_user
.vsockets
[vhost_user
.vsocket_cnt
++] = vsocket
;
535 pthread_mutex_unlock(&vhost_user
.mutex
);
541 vhost_user_remove_reconnect(struct vhost_user_socket
*vsocket
)
544 struct vhost_user_reconnect
*reconn
, *next
;
546 pthread_mutex_lock(&reconn_list
.mutex
);
548 for (reconn
= TAILQ_FIRST(&reconn_list
.head
);
549 reconn
!= NULL
; reconn
= next
) {
550 next
= TAILQ_NEXT(reconn
, next
);
552 if (reconn
->vsocket
== vsocket
) {
553 TAILQ_REMOVE(&reconn_list
.head
, reconn
, next
);
560 pthread_mutex_unlock(&reconn_list
.mutex
);
565 * Unregister the specified vhost socket
568 rte_vhost_driver_unregister(const char *path
)
572 struct vhost_user_connection
*conn
;
574 pthread_mutex_lock(&vhost_user
.mutex
);
576 for (i
= 0; i
< vhost_user
.vsocket_cnt
; i
++) {
577 struct vhost_user_socket
*vsocket
= vhost_user
.vsockets
[i
];
579 if (!strcmp(vsocket
->path
, path
)) {
580 if (vsocket
->is_server
) {
581 fdset_del(&vhost_user
.fdset
, vsocket
->listenfd
);
582 close(vsocket
->listenfd
);
584 } else if (vsocket
->reconnect
) {
585 vhost_user_remove_reconnect(vsocket
);
588 conn
= fdset_del(&vhost_user
.fdset
, vsocket
->connfd
);
590 RTE_LOG(INFO
, VHOST_CONFIG
,
591 "free connfd = %d for device '%s'\n",
592 vsocket
->connfd
, path
);
593 close(vsocket
->connfd
);
594 vhost_destroy_device(conn
->vid
);
601 count
= --vhost_user
.vsocket_cnt
;
602 vhost_user
.vsockets
[i
] = vhost_user
.vsockets
[count
];
603 vhost_user
.vsockets
[count
] = NULL
;
604 pthread_mutex_unlock(&vhost_user
.mutex
);
609 pthread_mutex_unlock(&vhost_user
.mutex
);
615 rte_vhost_driver_session_start(void)
617 fdset_event_dispatch(&vhost_user
.fdset
);