]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/lib/librte_vhost/socket.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_vhost / socket.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18
19 #include <rte_log.h>
20
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24
25
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27
28 /*
29 * Every time rte_vhost_driver_register() is invoked, an associated
30 * vhost_user_socket struct will be created.
31 */
32 struct vhost_user_socket {
33 struct vhost_user_connection_list conn_list;
34 pthread_mutex_t conn_mutex;
35 char *path;
36 int socket_fd;
37 struct sockaddr_un un;
38 bool is_server;
39 bool reconnect;
40 bool dequeue_zero_copy;
41 bool iommu_support;
42 bool use_builtin_virtio_net;
43
44 /*
45 * The "supported_features" indicates the feature bits the
46 * vhost driver supports. The "features" indicates the feature
47 * bits after the rte_vhost_driver_features_disable/enable().
48 * It is also the final feature bits used for vhost-user
49 * features negotiation.
50 */
51 uint64_t supported_features;
52 uint64_t features;
53
54 /*
55 * Device id to identify a specific backend device.
56 * It's set to -1 for the default software implementation.
57 * If valid, one socket can have 1 connection only.
58 */
59 int vdpa_dev_id;
60
61 struct vhost_device_ops const *notify_ops;
62 };
63
64 struct vhost_user_connection {
65 struct vhost_user_socket *vsocket;
66 int connfd;
67 int vid;
68
69 TAILQ_ENTRY(vhost_user_connection) next;
70 };
71
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75 struct fdset fdset;
76 int vsocket_cnt;
77 pthread_mutex_t mutex;
78 };
79
80 #define MAX_VIRTIO_BACKLOG 128
81
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86
87 static struct vhost_user vhost_user = {
88 .fdset = {
89 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91 .num = 0
92 },
93 .vsocket_cnt = 0,
94 .mutex = PTHREAD_MUTEX_INITIALIZER,
95 };
96
97 /* return bytes# of read on success or negative val on failure. */
98 int
99 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
100 {
101 struct iovec iov;
102 struct msghdr msgh;
103 size_t fdsize = fd_num * sizeof(int);
104 char control[CMSG_SPACE(fdsize)];
105 struct cmsghdr *cmsg;
106 int got_fds = 0;
107 int ret;
108
109 memset(&msgh, 0, sizeof(msgh));
110 iov.iov_base = buf;
111 iov.iov_len = buflen;
112
113 msgh.msg_iov = &iov;
114 msgh.msg_iovlen = 1;
115 msgh.msg_control = control;
116 msgh.msg_controllen = sizeof(control);
117
118 ret = recvmsg(sockfd, &msgh, 0);
119 if (ret <= 0) {
120 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
121 return ret;
122 }
123
124 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
125 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
126 return -1;
127 }
128
129 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
130 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
131 if ((cmsg->cmsg_level == SOL_SOCKET) &&
132 (cmsg->cmsg_type == SCM_RIGHTS)) {
133 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
134 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
135 break;
136 }
137 }
138
139 /* Clear out unused file descriptors */
140 while (got_fds < fd_num)
141 fds[got_fds++] = -1;
142
143 return ret;
144 }
145
146 int
147 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
148 {
149
150 struct iovec iov;
151 struct msghdr msgh;
152 size_t fdsize = fd_num * sizeof(int);
153 char control[CMSG_SPACE(fdsize)];
154 struct cmsghdr *cmsg;
155 int ret;
156
157 memset(&msgh, 0, sizeof(msgh));
158 iov.iov_base = buf;
159 iov.iov_len = buflen;
160
161 msgh.msg_iov = &iov;
162 msgh.msg_iovlen = 1;
163
164 if (fds && fd_num > 0) {
165 msgh.msg_control = control;
166 msgh.msg_controllen = sizeof(control);
167 cmsg = CMSG_FIRSTHDR(&msgh);
168 if (cmsg == NULL) {
169 RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
170 errno = EINVAL;
171 return -1;
172 }
173 cmsg->cmsg_len = CMSG_LEN(fdsize);
174 cmsg->cmsg_level = SOL_SOCKET;
175 cmsg->cmsg_type = SCM_RIGHTS;
176 memcpy(CMSG_DATA(cmsg), fds, fdsize);
177 } else {
178 msgh.msg_control = NULL;
179 msgh.msg_controllen = 0;
180 }
181
182 do {
183 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
184 } while (ret < 0 && errno == EINTR);
185
186 if (ret < 0) {
187 RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
188 return ret;
189 }
190
191 return ret;
192 }
193
194 static void
195 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
196 {
197 int vid;
198 size_t size;
199 struct vhost_user_connection *conn;
200 int ret;
201
202 if (vsocket == NULL)
203 return;
204
205 conn = malloc(sizeof(*conn));
206 if (conn == NULL) {
207 close(fd);
208 return;
209 }
210
211 vid = vhost_new_device();
212 if (vid == -1) {
213 goto err;
214 }
215
216 size = strnlen(vsocket->path, PATH_MAX);
217 vhost_set_ifname(vid, vsocket->path, size);
218
219 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
220
221 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id);
222
223 if (vsocket->dequeue_zero_copy)
224 vhost_enable_dequeue_zero_copy(vid);
225
226 RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
227
228 if (vsocket->notify_ops->new_connection) {
229 ret = vsocket->notify_ops->new_connection(vid);
230 if (ret < 0) {
231 RTE_LOG(ERR, VHOST_CONFIG,
232 "failed to add vhost user connection with fd %d\n",
233 fd);
234 goto err;
235 }
236 }
237
238 conn->connfd = fd;
239 conn->vsocket = vsocket;
240 conn->vid = vid;
241 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
242 NULL, conn);
243 if (ret < 0) {
244 RTE_LOG(ERR, VHOST_CONFIG,
245 "failed to add fd %d into vhost server fdset\n",
246 fd);
247
248 if (vsocket->notify_ops->destroy_connection)
249 vsocket->notify_ops->destroy_connection(conn->vid);
250
251 goto err;
252 }
253
254 pthread_mutex_lock(&vsocket->conn_mutex);
255 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
256 pthread_mutex_unlock(&vsocket->conn_mutex);
257
258 fdset_pipe_notify(&vhost_user.fdset);
259 return;
260
261 err:
262 free(conn);
263 close(fd);
264 }
265
266 /* call back when there is new vhost-user connection from client */
267 static void
268 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
269 {
270 struct vhost_user_socket *vsocket = dat;
271
272 fd = accept(fd, NULL, NULL);
273 if (fd < 0)
274 return;
275
276 RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
277 vhost_user_add_connection(fd, vsocket);
278 }
279
280 static void
281 vhost_user_read_cb(int connfd, void *dat, int *remove)
282 {
283 struct vhost_user_connection *conn = dat;
284 struct vhost_user_socket *vsocket = conn->vsocket;
285 int ret;
286
287 ret = vhost_user_msg_handler(conn->vid, connfd);
288 if (ret < 0) {
289 close(connfd);
290 *remove = 1;
291 vhost_destroy_device(conn->vid);
292
293 if (vsocket->notify_ops->destroy_connection)
294 vsocket->notify_ops->destroy_connection(conn->vid);
295
296 pthread_mutex_lock(&vsocket->conn_mutex);
297 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
298 pthread_mutex_unlock(&vsocket->conn_mutex);
299
300 free(conn);
301
302 if (vsocket->reconnect) {
303 create_unix_socket(vsocket);
304 vhost_user_start_client(vsocket);
305 }
306 }
307 }
308
309 static int
310 create_unix_socket(struct vhost_user_socket *vsocket)
311 {
312 int fd;
313 struct sockaddr_un *un = &vsocket->un;
314
315 fd = socket(AF_UNIX, SOCK_STREAM, 0);
316 if (fd < 0)
317 return -1;
318 RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
319 vsocket->is_server ? "server" : "client", fd);
320
321 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
322 RTE_LOG(ERR, VHOST_CONFIG,
323 "vhost-user: can't set nonblocking mode for socket, fd: "
324 "%d (%s)\n", fd, strerror(errno));
325 close(fd);
326 return -1;
327 }
328
329 memset(un, 0, sizeof(*un));
330 un->sun_family = AF_UNIX;
331 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
332 un->sun_path[sizeof(un->sun_path) - 1] = '\0';
333
334 vsocket->socket_fd = fd;
335 return 0;
336 }
337
338 static int
339 vhost_user_start_server(struct vhost_user_socket *vsocket)
340 {
341 int ret;
342 int fd = vsocket->socket_fd;
343 const char *path = vsocket->path;
344
345 /*
346 * bind () may fail if the socket file with the same name already
347 * exists. But the library obviously should not delete the file
348 * provided by the user, since we can not be sure that it is not
349 * being used by other applications. Moreover, many applications form
350 * socket names based on user input, which is prone to errors.
351 *
352 * The user must ensure that the socket does not exist before
353 * registering the vhost driver in server mode.
354 */
355 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
356 if (ret < 0) {
357 RTE_LOG(ERR, VHOST_CONFIG,
358 "failed to bind to %s: %s; remove it and try again\n",
359 path, strerror(errno));
360 goto err;
361 }
362 RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
363
364 ret = listen(fd, MAX_VIRTIO_BACKLOG);
365 if (ret < 0)
366 goto err;
367
368 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
369 NULL, vsocket);
370 if (ret < 0) {
371 RTE_LOG(ERR, VHOST_CONFIG,
372 "failed to add listen fd %d to vhost server fdset\n",
373 fd);
374 goto err;
375 }
376
377 return 0;
378
379 err:
380 close(fd);
381 return -1;
382 }
383
384 struct vhost_user_reconnect {
385 struct sockaddr_un un;
386 int fd;
387 struct vhost_user_socket *vsocket;
388
389 TAILQ_ENTRY(vhost_user_reconnect) next;
390 };
391
392 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
393 struct vhost_user_reconnect_list {
394 struct vhost_user_reconnect_tailq_list head;
395 pthread_mutex_t mutex;
396 };
397
398 static struct vhost_user_reconnect_list reconn_list;
399 static pthread_t reconn_tid;
400
401 static int
402 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
403 {
404 int ret, flags;
405
406 ret = connect(fd, un, sz);
407 if (ret < 0 && errno != EISCONN)
408 return -1;
409
410 flags = fcntl(fd, F_GETFL, 0);
411 if (flags < 0) {
412 RTE_LOG(ERR, VHOST_CONFIG,
413 "can't get flags for connfd %d\n", fd);
414 return -2;
415 }
416 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
417 RTE_LOG(ERR, VHOST_CONFIG,
418 "can't disable nonblocking on fd %d\n", fd);
419 return -2;
420 }
421 return 0;
422 }
423
424 static void *
425 vhost_user_client_reconnect(void *arg __rte_unused)
426 {
427 int ret;
428 struct vhost_user_reconnect *reconn, *next;
429
430 while (1) {
431 pthread_mutex_lock(&reconn_list.mutex);
432
433 /*
434 * An equal implementation of TAILQ_FOREACH_SAFE,
435 * which does not exist on all platforms.
436 */
437 for (reconn = TAILQ_FIRST(&reconn_list.head);
438 reconn != NULL; reconn = next) {
439 next = TAILQ_NEXT(reconn, next);
440
441 ret = vhost_user_connect_nonblock(reconn->fd,
442 (struct sockaddr *)&reconn->un,
443 sizeof(reconn->un));
444 if (ret == -2) {
445 close(reconn->fd);
446 RTE_LOG(ERR, VHOST_CONFIG,
447 "reconnection for fd %d failed\n",
448 reconn->fd);
449 goto remove_fd;
450 }
451 if (ret == -1)
452 continue;
453
454 RTE_LOG(INFO, VHOST_CONFIG,
455 "%s: connected\n", reconn->vsocket->path);
456 vhost_user_add_connection(reconn->fd, reconn->vsocket);
457 remove_fd:
458 TAILQ_REMOVE(&reconn_list.head, reconn, next);
459 free(reconn);
460 }
461
462 pthread_mutex_unlock(&reconn_list.mutex);
463 sleep(1);
464 }
465
466 return NULL;
467 }
468
469 static int
470 vhost_user_reconnect_init(void)
471 {
472 int ret;
473
474 ret = pthread_mutex_init(&reconn_list.mutex, NULL);
475 if (ret < 0) {
476 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex");
477 return ret;
478 }
479 TAILQ_INIT(&reconn_list.head);
480
481 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
482 vhost_user_client_reconnect, NULL);
483 if (ret != 0) {
484 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
485 if (pthread_mutex_destroy(&reconn_list.mutex)) {
486 RTE_LOG(ERR, VHOST_CONFIG,
487 "failed to destroy reconnect mutex");
488 }
489 }
490
491 return ret;
492 }
493
494 static int
495 vhost_user_start_client(struct vhost_user_socket *vsocket)
496 {
497 int ret;
498 int fd = vsocket->socket_fd;
499 const char *path = vsocket->path;
500 struct vhost_user_reconnect *reconn;
501
502 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
503 sizeof(vsocket->un));
504 if (ret == 0) {
505 vhost_user_add_connection(fd, vsocket);
506 return 0;
507 }
508
509 RTE_LOG(WARNING, VHOST_CONFIG,
510 "failed to connect to %s: %s\n",
511 path, strerror(errno));
512
513 if (ret == -2 || !vsocket->reconnect) {
514 close(fd);
515 return -1;
516 }
517
518 RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
519 reconn = malloc(sizeof(*reconn));
520 if (reconn == NULL) {
521 RTE_LOG(ERR, VHOST_CONFIG,
522 "failed to allocate memory for reconnect\n");
523 close(fd);
524 return -1;
525 }
526 reconn->un = vsocket->un;
527 reconn->fd = fd;
528 reconn->vsocket = vsocket;
529 pthread_mutex_lock(&reconn_list.mutex);
530 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
531 pthread_mutex_unlock(&reconn_list.mutex);
532
533 return 0;
534 }
535
536 static struct vhost_user_socket *
537 find_vhost_user_socket(const char *path)
538 {
539 int i;
540
541 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
542 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
543
544 if (!strcmp(vsocket->path, path))
545 return vsocket;
546 }
547
548 return NULL;
549 }
550
551 int
552 rte_vhost_driver_attach_vdpa_device(const char *path, int did)
553 {
554 struct vhost_user_socket *vsocket;
555
556 if (rte_vdpa_get_device(did) == NULL)
557 return -1;
558
559 pthread_mutex_lock(&vhost_user.mutex);
560 vsocket = find_vhost_user_socket(path);
561 if (vsocket)
562 vsocket->vdpa_dev_id = did;
563 pthread_mutex_unlock(&vhost_user.mutex);
564
565 return vsocket ? 0 : -1;
566 }
567
568 int
569 rte_vhost_driver_detach_vdpa_device(const char *path)
570 {
571 struct vhost_user_socket *vsocket;
572
573 pthread_mutex_lock(&vhost_user.mutex);
574 vsocket = find_vhost_user_socket(path);
575 if (vsocket)
576 vsocket->vdpa_dev_id = -1;
577 pthread_mutex_unlock(&vhost_user.mutex);
578
579 return vsocket ? 0 : -1;
580 }
581
582 int
583 rte_vhost_driver_get_vdpa_device_id(const char *path)
584 {
585 struct vhost_user_socket *vsocket;
586 int did = -1;
587
588 pthread_mutex_lock(&vhost_user.mutex);
589 vsocket = find_vhost_user_socket(path);
590 if (vsocket)
591 did = vsocket->vdpa_dev_id;
592 pthread_mutex_unlock(&vhost_user.mutex);
593
594 return did;
595 }
596
597 int
598 rte_vhost_driver_disable_features(const char *path, uint64_t features)
599 {
600 struct vhost_user_socket *vsocket;
601
602 pthread_mutex_lock(&vhost_user.mutex);
603 vsocket = find_vhost_user_socket(path);
604
605 /* Note that use_builtin_virtio_net is not affected by this function
606 * since callers may want to selectively disable features of the
607 * built-in vhost net device backend.
608 */
609
610 if (vsocket)
611 vsocket->features &= ~features;
612 pthread_mutex_unlock(&vhost_user.mutex);
613
614 return vsocket ? 0 : -1;
615 }
616
617 int
618 rte_vhost_driver_enable_features(const char *path, uint64_t features)
619 {
620 struct vhost_user_socket *vsocket;
621
622 pthread_mutex_lock(&vhost_user.mutex);
623 vsocket = find_vhost_user_socket(path);
624 if (vsocket) {
625 if ((vsocket->supported_features & features) != features) {
626 /*
627 * trying to enable features the driver doesn't
628 * support.
629 */
630 pthread_mutex_unlock(&vhost_user.mutex);
631 return -1;
632 }
633 vsocket->features |= features;
634 }
635 pthread_mutex_unlock(&vhost_user.mutex);
636
637 return vsocket ? 0 : -1;
638 }
639
640 int
641 rte_vhost_driver_set_features(const char *path, uint64_t features)
642 {
643 struct vhost_user_socket *vsocket;
644
645 pthread_mutex_lock(&vhost_user.mutex);
646 vsocket = find_vhost_user_socket(path);
647 if (vsocket) {
648 vsocket->supported_features = features;
649 vsocket->features = features;
650
651 /* Anyone setting feature bits is implementing their own vhost
652 * device backend.
653 */
654 vsocket->use_builtin_virtio_net = false;
655 }
656 pthread_mutex_unlock(&vhost_user.mutex);
657
658 return vsocket ? 0 : -1;
659 }
660
661 int
662 rte_vhost_driver_get_features(const char *path, uint64_t *features)
663 {
664 struct vhost_user_socket *vsocket;
665 uint64_t vdpa_features;
666 struct rte_vdpa_device *vdpa_dev;
667 int did = -1;
668 int ret = 0;
669
670 pthread_mutex_lock(&vhost_user.mutex);
671 vsocket = find_vhost_user_socket(path);
672 if (!vsocket) {
673 RTE_LOG(ERR, VHOST_CONFIG,
674 "socket file %s is not registered yet.\n", path);
675 ret = -1;
676 goto unlock_exit;
677 }
678
679 did = vsocket->vdpa_dev_id;
680 vdpa_dev = rte_vdpa_get_device(did);
681 if (!vdpa_dev || !vdpa_dev->ops->get_features) {
682 *features = vsocket->features;
683 goto unlock_exit;
684 }
685
686 if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) {
687 RTE_LOG(ERR, VHOST_CONFIG,
688 "failed to get vdpa features "
689 "for socket file %s.\n", path);
690 ret = -1;
691 goto unlock_exit;
692 }
693
694 *features = vsocket->features & vdpa_features;
695
696 unlock_exit:
697 pthread_mutex_unlock(&vhost_user.mutex);
698 return ret;
699 }
700
701 int
702 rte_vhost_driver_get_protocol_features(const char *path,
703 uint64_t *protocol_features)
704 {
705 struct vhost_user_socket *vsocket;
706 uint64_t vdpa_protocol_features;
707 struct rte_vdpa_device *vdpa_dev;
708 int did = -1;
709 int ret = 0;
710
711 pthread_mutex_lock(&vhost_user.mutex);
712 vsocket = find_vhost_user_socket(path);
713 if (!vsocket) {
714 RTE_LOG(ERR, VHOST_CONFIG,
715 "socket file %s is not registered yet.\n", path);
716 ret = -1;
717 goto unlock_exit;
718 }
719
720 did = vsocket->vdpa_dev_id;
721 vdpa_dev = rte_vdpa_get_device(did);
722 if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
723 *protocol_features = VHOST_USER_PROTOCOL_FEATURES;
724 goto unlock_exit;
725 }
726
727 if (vdpa_dev->ops->get_protocol_features(did,
728 &vdpa_protocol_features) < 0) {
729 RTE_LOG(ERR, VHOST_CONFIG,
730 "failed to get vdpa protocol features "
731 "for socket file %s.\n", path);
732 ret = -1;
733 goto unlock_exit;
734 }
735
736 *protocol_features = VHOST_USER_PROTOCOL_FEATURES
737 & vdpa_protocol_features;
738
739 unlock_exit:
740 pthread_mutex_unlock(&vhost_user.mutex);
741 return ret;
742 }
743
744 int
745 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
746 {
747 struct vhost_user_socket *vsocket;
748 uint32_t vdpa_queue_num;
749 struct rte_vdpa_device *vdpa_dev;
750 int did = -1;
751 int ret = 0;
752
753 pthread_mutex_lock(&vhost_user.mutex);
754 vsocket = find_vhost_user_socket(path);
755 if (!vsocket) {
756 RTE_LOG(ERR, VHOST_CONFIG,
757 "socket file %s is not registered yet.\n", path);
758 ret = -1;
759 goto unlock_exit;
760 }
761
762 did = vsocket->vdpa_dev_id;
763 vdpa_dev = rte_vdpa_get_device(did);
764 if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) {
765 *queue_num = VHOST_MAX_QUEUE_PAIRS;
766 goto unlock_exit;
767 }
768
769 if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) {
770 RTE_LOG(ERR, VHOST_CONFIG,
771 "failed to get vdpa queue number "
772 "for socket file %s.\n", path);
773 ret = -1;
774 goto unlock_exit;
775 }
776
777 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
778
779 unlock_exit:
780 pthread_mutex_unlock(&vhost_user.mutex);
781 return ret;
782 }
783
784 static void
785 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
786 {
787 if (vsocket && vsocket->path) {
788 free(vsocket->path);
789 vsocket->path = NULL;
790 }
791
792 if (vsocket) {
793 free(vsocket);
794 vsocket = NULL;
795 }
796 }
797
798 /*
799 * Register a new vhost-user socket; here we could act as server
800 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
801 * is set.
802 */
803 int
804 rte_vhost_driver_register(const char *path, uint64_t flags)
805 {
806 int ret = -1;
807 struct vhost_user_socket *vsocket;
808
809 if (!path)
810 return -1;
811
812 pthread_mutex_lock(&vhost_user.mutex);
813
814 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
815 RTE_LOG(ERR, VHOST_CONFIG,
816 "error: the number of vhost sockets reaches maximum\n");
817 goto out;
818 }
819
820 vsocket = malloc(sizeof(struct vhost_user_socket));
821 if (!vsocket)
822 goto out;
823 memset(vsocket, 0, sizeof(struct vhost_user_socket));
824 vsocket->path = strdup(path);
825 if (vsocket->path == NULL) {
826 RTE_LOG(ERR, VHOST_CONFIG,
827 "error: failed to copy socket path string\n");
828 vhost_user_socket_mem_free(vsocket);
829 goto out;
830 }
831 TAILQ_INIT(&vsocket->conn_list);
832 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
833 if (ret) {
834 RTE_LOG(ERR, VHOST_CONFIG,
835 "error: failed to init connection mutex\n");
836 goto out_free;
837 }
838 vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
839
840 /*
841 * Set the supported features correctly for the builtin vhost-user
842 * net driver.
843 *
844 * Applications know nothing about features the builtin virtio net
845 * driver (virtio_net.c) supports, thus it's not possible for them
846 * to invoke rte_vhost_driver_set_features(). To workaround it, here
847 * we set it unconditionally. If the application want to implement
848 * another vhost-user driver (say SCSI), it should call the
849 * rte_vhost_driver_set_features(), which will overwrite following
850 * two values.
851 */
852 vsocket->use_builtin_virtio_net = true;
853 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
854 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
855
856 /* Dequeue zero copy can't assure descriptors returned in order */
857 if (vsocket->dequeue_zero_copy) {
858 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
859 vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
860 }
861
862 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
863 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
864 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
865 }
866
867 if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
868 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
869 if (vsocket->reconnect && reconn_tid == 0) {
870 if (vhost_user_reconnect_init() != 0)
871 goto out_mutex;
872 }
873 } else {
874 vsocket->is_server = true;
875 }
876 ret = create_unix_socket(vsocket);
877 if (ret < 0) {
878 goto out_mutex;
879 }
880
881 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
882
883 pthread_mutex_unlock(&vhost_user.mutex);
884 return ret;
885
886 out_mutex:
887 if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
888 RTE_LOG(ERR, VHOST_CONFIG,
889 "error: failed to destroy connection mutex\n");
890 }
891 out_free:
892 vhost_user_socket_mem_free(vsocket);
893 out:
894 pthread_mutex_unlock(&vhost_user.mutex);
895
896 return ret;
897 }
898
899 static bool
900 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
901 {
902 int found = false;
903 struct vhost_user_reconnect *reconn, *next;
904
905 pthread_mutex_lock(&reconn_list.mutex);
906
907 for (reconn = TAILQ_FIRST(&reconn_list.head);
908 reconn != NULL; reconn = next) {
909 next = TAILQ_NEXT(reconn, next);
910
911 if (reconn->vsocket == vsocket) {
912 TAILQ_REMOVE(&reconn_list.head, reconn, next);
913 close(reconn->fd);
914 free(reconn);
915 found = true;
916 break;
917 }
918 }
919 pthread_mutex_unlock(&reconn_list.mutex);
920 return found;
921 }
922
923 /**
924 * Unregister the specified vhost socket
925 */
926 int
927 rte_vhost_driver_unregister(const char *path)
928 {
929 int i;
930 int count;
931 struct vhost_user_connection *conn, *next;
932
933 pthread_mutex_lock(&vhost_user.mutex);
934
935 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
936 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
937
938 if (!strcmp(vsocket->path, path)) {
939 again:
940 pthread_mutex_lock(&vsocket->conn_mutex);
941 for (conn = TAILQ_FIRST(&vsocket->conn_list);
942 conn != NULL;
943 conn = next) {
944 next = TAILQ_NEXT(conn, next);
945
946 /*
947 * If r/wcb is executing, release the
948 * conn_mutex lock, and try again since
949 * the r/wcb may use the conn_mutex lock.
950 */
951 if (fdset_try_del(&vhost_user.fdset,
952 conn->connfd) == -1) {
953 pthread_mutex_unlock(
954 &vsocket->conn_mutex);
955 goto again;
956 }
957
958 RTE_LOG(INFO, VHOST_CONFIG,
959 "free connfd = %d for device '%s'\n",
960 conn->connfd, path);
961 close(conn->connfd);
962 vhost_destroy_device(conn->vid);
963 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
964 free(conn);
965 }
966 pthread_mutex_unlock(&vsocket->conn_mutex);
967
968 if (vsocket->is_server) {
969 fdset_del(&vhost_user.fdset,
970 vsocket->socket_fd);
971 close(vsocket->socket_fd);
972 unlink(path);
973 } else if (vsocket->reconnect) {
974 vhost_user_remove_reconnect(vsocket);
975 }
976
977 pthread_mutex_destroy(&vsocket->conn_mutex);
978 vhost_user_socket_mem_free(vsocket);
979
980 count = --vhost_user.vsocket_cnt;
981 vhost_user.vsockets[i] = vhost_user.vsockets[count];
982 vhost_user.vsockets[count] = NULL;
983 pthread_mutex_unlock(&vhost_user.mutex);
984
985 return 0;
986 }
987 }
988 pthread_mutex_unlock(&vhost_user.mutex);
989
990 return -1;
991 }
992
993 /*
994 * Register ops so that we can add/remove device to data core.
995 */
996 int
997 rte_vhost_driver_callback_register(const char *path,
998 struct vhost_device_ops const * const ops)
999 {
1000 struct vhost_user_socket *vsocket;
1001
1002 pthread_mutex_lock(&vhost_user.mutex);
1003 vsocket = find_vhost_user_socket(path);
1004 if (vsocket)
1005 vsocket->notify_ops = ops;
1006 pthread_mutex_unlock(&vhost_user.mutex);
1007
1008 return vsocket ? 0 : -1;
1009 }
1010
1011 struct vhost_device_ops const *
1012 vhost_driver_callback_get(const char *path)
1013 {
1014 struct vhost_user_socket *vsocket;
1015
1016 pthread_mutex_lock(&vhost_user.mutex);
1017 vsocket = find_vhost_user_socket(path);
1018 pthread_mutex_unlock(&vhost_user.mutex);
1019
1020 return vsocket ? vsocket->notify_ops : NULL;
1021 }
1022
1023 int
1024 rte_vhost_driver_start(const char *path)
1025 {
1026 struct vhost_user_socket *vsocket;
1027 static pthread_t fdset_tid;
1028
1029 pthread_mutex_lock(&vhost_user.mutex);
1030 vsocket = find_vhost_user_socket(path);
1031 pthread_mutex_unlock(&vhost_user.mutex);
1032
1033 if (!vsocket)
1034 return -1;
1035
1036 if (fdset_tid == 0) {
1037 /**
1038 * create a pipe which will be waited by poll and notified to
1039 * rebuild the wait list of poll.
1040 */
1041 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1042 RTE_LOG(ERR, VHOST_CONFIG,
1043 "failed to create pipe for vhost fdset\n");
1044 return -1;
1045 }
1046
1047 int ret = rte_ctrl_thread_create(&fdset_tid,
1048 "vhost-events", NULL, fdset_event_dispatch,
1049 &vhost_user.fdset);
1050 if (ret != 0) {
1051 RTE_LOG(ERR, VHOST_CONFIG,
1052 "failed to create fdset handling thread");
1053
1054 fdset_pipe_uninit(&vhost_user.fdset);
1055 return -1;
1056 }
1057 }
1058
1059 if (vsocket->is_server)
1060 return vhost_user_start_server(vsocket);
1061 else
1062 return vhost_user_start_client(vsocket);
1063 }