]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
update download target update for octopus release
[ceph.git] / ceph / src / seastar / dpdk / lib / librte_eal / linuxapp / eal / eal_vfio_mp_sync.c
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <string.h>
35#include <fcntl.h>
36#include <sys/socket.h>
37#include <pthread.h>
38
39/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
40#ifdef __USE_MISC
41#define REMOVED_USE_MISC
42#undef __USE_MISC
43#endif
44#include <sys/un.h>
45/* make sure we redefine __USE_MISC only if it was previously undefined */
46#ifdef REMOVED_USE_MISC
47#define __USE_MISC
48#undef REMOVED_USE_MISC
49#endif
50
51#include <rte_log.h>
52#include <rte_pci.h>
53#include <rte_eal_memconfig.h>
54#include <rte_malloc.h>
55
56#include "eal_filesystem.h"
57#include "eal_pci_init.h"
58#include "eal_thread.h"
59
60/**
61 * @file
62 * VFIO socket for communication between primary and secondary processes.
63 *
64 * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
65 */
66
67#ifdef VFIO_PRESENT
68
69#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
70#define CMSGLEN (CMSG_LEN(sizeof(int)))
71#define FD_TO_CMSGHDR(fd, chdr) \
72 do {\
73 (chdr).cmsg_len = CMSGLEN;\
74 (chdr).cmsg_level = SOL_SOCKET;\
75 (chdr).cmsg_type = SCM_RIGHTS;\
76 memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
77 } while (0)
78#define CMSGHDR_TO_FD(chdr, fd) \
79 memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
80
81static pthread_t socket_thread;
82static int mp_socket_fd;
83
84
85/* get socket path (/var/run if root, $HOME otherwise) */
86static void
87get_socket_path(char *buffer, int bufsz)
88{
89 const char *dir = "/var/run";
90 const char *home_dir = getenv("HOME");
91
92 if (getuid() != 0 && home_dir != NULL)
93 dir = home_dir;
94
95 /* use current prefix as file path */
96 snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
97 internal_config.hugefile_prefix);
98}
99
100
101
102/*
103 * data flow for socket comm protocol:
104 * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
105 * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
106 * 2. server receives message
107 * 2a. in case of invalid group, SOCKET_ERR is sent back to client
108 * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
109 * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
110 *
111 * in case of any error, socket is closed.
112 */
113
114/* send a request, return -1 on error */
115int
116vfio_mp_sync_send_request(int socket, int req)
117{
118 struct msghdr hdr;
119 struct iovec iov;
120 int buf;
121 int ret;
122
123 memset(&hdr, 0, sizeof(hdr));
124
125 buf = req;
126
127 hdr.msg_iov = &iov;
128 hdr.msg_iovlen = 1;
129 iov.iov_base = (char *) &buf;
130 iov.iov_len = sizeof(buf);
131
132 ret = sendmsg(socket, &hdr, 0);
133 if (ret < 0)
134 return -1;
135 return 0;
136}
137
138/* receive a request and return it */
139int
140vfio_mp_sync_receive_request(int socket)
141{
142 int buf;
143 struct msghdr hdr;
144 struct iovec iov;
145 int ret, req;
146
147 memset(&hdr, 0, sizeof(hdr));
148
149 buf = SOCKET_ERR;
150
151 hdr.msg_iov = &iov;
152 hdr.msg_iovlen = 1;
153 iov.iov_base = (char *) &buf;
154 iov.iov_len = sizeof(buf);
155
156 ret = recvmsg(socket, &hdr, 0);
157 if (ret < 0)
158 return -1;
159
160 req = buf;
161
162 return req;
163}
164
165/* send OK in message, fd in control message */
166int
167vfio_mp_sync_send_fd(int socket, int fd)
168{
169 int buf;
170 struct msghdr hdr;
171 struct cmsghdr *chdr;
172 char chdr_buf[CMSGLEN];
173 struct iovec iov;
174 int ret;
175
176 chdr = (struct cmsghdr *) chdr_buf;
177 memset(chdr, 0, sizeof(chdr_buf));
178 memset(&hdr, 0, sizeof(hdr));
179
180 hdr.msg_iov = &iov;
181 hdr.msg_iovlen = 1;
182 iov.iov_base = (char *) &buf;
183 iov.iov_len = sizeof(buf);
184 hdr.msg_control = chdr;
185 hdr.msg_controllen = CMSGLEN;
186
187 buf = SOCKET_OK;
188 FD_TO_CMSGHDR(fd, *chdr);
189
190 ret = sendmsg(socket, &hdr, 0);
191 if (ret < 0)
192 return -1;
193 return 0;
194}
195
196/* receive OK in message, fd in control message */
197int
198vfio_mp_sync_receive_fd(int socket)
199{
200 int buf;
201 struct msghdr hdr;
202 struct cmsghdr *chdr;
203 char chdr_buf[CMSGLEN];
204 struct iovec iov;
205 int ret, req, fd;
206
207 buf = SOCKET_ERR;
208
209 chdr = (struct cmsghdr *) chdr_buf;
210 memset(chdr, 0, sizeof(chdr_buf));
211 memset(&hdr, 0, sizeof(hdr));
212
213 hdr.msg_iov = &iov;
214 hdr.msg_iovlen = 1;
215 iov.iov_base = (char *) &buf;
216 iov.iov_len = sizeof(buf);
217 hdr.msg_control = chdr;
218 hdr.msg_controllen = CMSGLEN;
219
220 ret = recvmsg(socket, &hdr, 0);
221 if (ret < 0)
222 return -1;
223
224 req = buf;
225
226 if (req != SOCKET_OK)
227 return -1;
228
229 CMSGHDR_TO_FD(*chdr, fd);
230
231 return fd;
232}
233
234/* connect socket_fd in secondary process to the primary process's socket */
235int
236vfio_mp_sync_connect_to_primary(void)
237{
238 struct sockaddr_un addr;
239 socklen_t sockaddr_len;
240 int socket_fd;
241
242 /* set up a socket */
243 socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
244 if (socket_fd < 0) {
245 RTE_LOG(ERR, EAL, "Failed to create socket!\n");
246 return -1;
247 }
248
249 get_socket_path(addr.sun_path, sizeof(addr.sun_path));
250 addr.sun_family = AF_UNIX;
251
252 sockaddr_len = sizeof(struct sockaddr_un);
253
254 if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
255 return socket_fd;
256
257 /* if connect failed */
258 close(socket_fd);
259 return -1;
260}
261
262
263
264/*
265 * socket listening thread for primary process
266 */
267static __attribute__((noreturn)) void *
268vfio_mp_sync_thread(void __rte_unused * arg)
269{
11fdf7f2 270 int ret, fd, vfio_data;
7c673cae
FG
271
272 /* wait for requests on the socket */
273 for (;;) {
274 int conn_sock;
275 struct sockaddr_un addr;
276 socklen_t sockaddr_len = sizeof(addr);
277
278 /* this is a blocking call */
279 conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
280 &sockaddr_len);
281
282 /* just restart on error */
283 if (conn_sock == -1)
284 continue;
285
286 /* set socket to linger after close */
287 struct linger l;
288 l.l_onoff = 1;
289 l.l_linger = 60;
290
291 if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
292 RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
293 "on listen socket (%s)\n", strerror(errno));
294
295 ret = vfio_mp_sync_receive_request(conn_sock);
296
297 switch (ret) {
298 case SOCKET_REQ_CONTAINER:
299 fd = vfio_get_container_fd();
300 if (fd < 0)
301 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
302 else
303 vfio_mp_sync_send_fd(conn_sock, fd);
11fdf7f2 304 close(fd);
7c673cae
FG
305 break;
306 case SOCKET_REQ_GROUP:
307 /* wait for group number */
11fdf7f2
TL
308 vfio_data = vfio_mp_sync_receive_request(conn_sock);
309 if (vfio_data < 0) {
7c673cae
FG
310 close(conn_sock);
311 continue;
312 }
313
11fdf7f2 314 fd = vfio_get_group_fd(vfio_data);
7c673cae
FG
315
316 if (fd < 0)
317 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
318 /* if VFIO group exists but isn't bound to VFIO driver */
319 else if (fd == 0)
320 vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
321 /* if group exists and is bound to VFIO driver */
322 else {
323 vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
324 vfio_mp_sync_send_fd(conn_sock, fd);
325 }
326 break;
11fdf7f2
TL
327 case SOCKET_CLR_GROUP:
328 /* wait for group fd */
329 vfio_data = vfio_mp_sync_receive_request(conn_sock);
330 if (vfio_data < 0) {
331 close(conn_sock);
332 continue;
333 }
334
335 ret = clear_group(vfio_data);
336
337 if (ret < 0)
338 vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
339 else
340 vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
341 break;
7c673cae
FG
342 default:
343 vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
344 break;
345 }
346 close(conn_sock);
347 }
348}
349
350static int
351vfio_mp_sync_socket_setup(void)
352{
353 int ret, socket_fd;
354 struct sockaddr_un addr;
355 socklen_t sockaddr_len;
356
357 /* set up a socket */
358 socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
359 if (socket_fd < 0) {
360 RTE_LOG(ERR, EAL, "Failed to create socket!\n");
361 return -1;
362 }
363
364 get_socket_path(addr.sun_path, sizeof(addr.sun_path));
365 addr.sun_family = AF_UNIX;
366
367 sockaddr_len = sizeof(struct sockaddr_un);
368
369 unlink(addr.sun_path);
370
371 ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
372 if (ret) {
373 RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
374 close(socket_fd);
375 return -1;
376 }
377
378 ret = listen(socket_fd, 50);
379 if (ret) {
380 RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
381 close(socket_fd);
382 return -1;
383 }
384
385 /* save the socket in local configuration */
386 mp_socket_fd = socket_fd;
387
388 return 0;
389}
390
391/*
392 * set up a local socket and tell it to listen for incoming connections
393 */
394int
395vfio_mp_sync_setup(void)
396{
397 int ret;
398 char thread_name[RTE_MAX_THREAD_NAME_LEN];
399
400 if (vfio_mp_sync_socket_setup() < 0) {
401 RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
402 return -1;
403 }
404
405 ret = pthread_create(&socket_thread, NULL,
406 vfio_mp_sync_thread, NULL);
407 if (ret) {
408 RTE_LOG(ERR, EAL,
409 "Failed to create thread for communication with secondary processes!\n");
410 close(mp_socket_fd);
411 return -1;
412 }
413
414 /* Set thread_name for aid in debugging. */
415 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
416 ret = rte_thread_setname(socket_thread, thread_name);
417 if (ret)
418 RTE_LOG(DEBUG, EAL,
419 "Failed to set thread name for secondary processes!\n");
420
421 return 0;
422}
423
424#endif