]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #include <string.h> | |
35 | #include <fcntl.h> | |
36 | #include <sys/socket.h> | |
37 | #include <pthread.h> | |
38 | ||
39 | /* sys/un.h with __USE_MISC uses strlen, which is unsafe */ | |
40 | #ifdef __USE_MISC | |
41 | #define REMOVED_USE_MISC | |
42 | #undef __USE_MISC | |
43 | #endif | |
44 | #include <sys/un.h> | |
45 | /* make sure we redefine __USE_MISC only if it was previously undefined */ | |
46 | #ifdef REMOVED_USE_MISC | |
47 | #define __USE_MISC | |
48 | #undef REMOVED_USE_MISC | |
49 | #endif | |
50 | ||
51 | #include <rte_log.h> | |
52 | #include <rte_pci.h> | |
53 | #include <rte_eal_memconfig.h> | |
54 | #include <rte_malloc.h> | |
55 | ||
56 | #include "eal_filesystem.h" | |
57 | #include "eal_pci_init.h" | |
58 | #include "eal_thread.h" | |
59 | ||
60 | /** | |
61 | * @file | |
62 | * VFIO socket for communication between primary and secondary processes. | |
63 | * | |
64 | * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". | |
65 | */ | |
66 | ||
67 | #ifdef VFIO_PRESENT | |
68 | ||
69 | #define SOCKET_PATH_FMT "%s/.%s_mp_socket" | |
70 | #define CMSGLEN (CMSG_LEN(sizeof(int))) | |
71 | #define FD_TO_CMSGHDR(fd, chdr) \ | |
72 | do {\ | |
73 | (chdr).cmsg_len = CMSGLEN;\ | |
74 | (chdr).cmsg_level = SOL_SOCKET;\ | |
75 | (chdr).cmsg_type = SCM_RIGHTS;\ | |
76 | memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ | |
77 | } while (0) | |
78 | #define CMSGHDR_TO_FD(chdr, fd) \ | |
79 | memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) | |
80 | ||
81 | static pthread_t socket_thread; | |
82 | static int mp_socket_fd; | |
83 | ||
84 | ||
85 | /* get socket path (/var/run if root, $HOME otherwise) */ | |
86 | static void | |
87 | get_socket_path(char *buffer, int bufsz) | |
88 | { | |
89 | const char *dir = "/var/run"; | |
90 | const char *home_dir = getenv("HOME"); | |
91 | ||
92 | if (getuid() != 0 && home_dir != NULL) | |
93 | dir = home_dir; | |
94 | ||
95 | /* use current prefix as file path */ | |
96 | snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, | |
97 | internal_config.hugefile_prefix); | |
98 | } | |
99 | ||
100 | ||
101 | ||
102 | /* | |
103 | * data flow for socket comm protocol: | |
104 | * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP | |
105 | * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number | |
106 | * 2. server receives message | |
107 | * 2a. in case of invalid group, SOCKET_ERR is sent back to client | |
108 | * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client | |
109 | * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd | |
110 | * | |
111 | * in case of any error, socket is closed. | |
112 | */ | |
113 | ||
114 | /* send a request, return -1 on error */ | |
115 | int | |
116 | vfio_mp_sync_send_request(int socket, int req) | |
117 | { | |
118 | struct msghdr hdr; | |
119 | struct iovec iov; | |
120 | int buf; | |
121 | int ret; | |
122 | ||
123 | memset(&hdr, 0, sizeof(hdr)); | |
124 | ||
125 | buf = req; | |
126 | ||
127 | hdr.msg_iov = &iov; | |
128 | hdr.msg_iovlen = 1; | |
129 | iov.iov_base = (char *) &buf; | |
130 | iov.iov_len = sizeof(buf); | |
131 | ||
132 | ret = sendmsg(socket, &hdr, 0); | |
133 | if (ret < 0) | |
134 | return -1; | |
135 | return 0; | |
136 | } | |
137 | ||
138 | /* receive a request and return it */ | |
139 | int | |
140 | vfio_mp_sync_receive_request(int socket) | |
141 | { | |
142 | int buf; | |
143 | struct msghdr hdr; | |
144 | struct iovec iov; | |
145 | int ret, req; | |
146 | ||
147 | memset(&hdr, 0, sizeof(hdr)); | |
148 | ||
149 | buf = SOCKET_ERR; | |
150 | ||
151 | hdr.msg_iov = &iov; | |
152 | hdr.msg_iovlen = 1; | |
153 | iov.iov_base = (char *) &buf; | |
154 | iov.iov_len = sizeof(buf); | |
155 | ||
156 | ret = recvmsg(socket, &hdr, 0); | |
157 | if (ret < 0) | |
158 | return -1; | |
159 | ||
160 | req = buf; | |
161 | ||
162 | return req; | |
163 | } | |
164 | ||
165 | /* send OK in message, fd in control message */ | |
166 | int | |
167 | vfio_mp_sync_send_fd(int socket, int fd) | |
168 | { | |
169 | int buf; | |
170 | struct msghdr hdr; | |
171 | struct cmsghdr *chdr; | |
172 | char chdr_buf[CMSGLEN]; | |
173 | struct iovec iov; | |
174 | int ret; | |
175 | ||
176 | chdr = (struct cmsghdr *) chdr_buf; | |
177 | memset(chdr, 0, sizeof(chdr_buf)); | |
178 | memset(&hdr, 0, sizeof(hdr)); | |
179 | ||
180 | hdr.msg_iov = &iov; | |
181 | hdr.msg_iovlen = 1; | |
182 | iov.iov_base = (char *) &buf; | |
183 | iov.iov_len = sizeof(buf); | |
184 | hdr.msg_control = chdr; | |
185 | hdr.msg_controllen = CMSGLEN; | |
186 | ||
187 | buf = SOCKET_OK; | |
188 | FD_TO_CMSGHDR(fd, *chdr); | |
189 | ||
190 | ret = sendmsg(socket, &hdr, 0); | |
191 | if (ret < 0) | |
192 | return -1; | |
193 | return 0; | |
194 | } | |
195 | ||
196 | /* receive OK in message, fd in control message */ | |
197 | int | |
198 | vfio_mp_sync_receive_fd(int socket) | |
199 | { | |
200 | int buf; | |
201 | struct msghdr hdr; | |
202 | struct cmsghdr *chdr; | |
203 | char chdr_buf[CMSGLEN]; | |
204 | struct iovec iov; | |
205 | int ret, req, fd; | |
206 | ||
207 | buf = SOCKET_ERR; | |
208 | ||
209 | chdr = (struct cmsghdr *) chdr_buf; | |
210 | memset(chdr, 0, sizeof(chdr_buf)); | |
211 | memset(&hdr, 0, sizeof(hdr)); | |
212 | ||
213 | hdr.msg_iov = &iov; | |
214 | hdr.msg_iovlen = 1; | |
215 | iov.iov_base = (char *) &buf; | |
216 | iov.iov_len = sizeof(buf); | |
217 | hdr.msg_control = chdr; | |
218 | hdr.msg_controllen = CMSGLEN; | |
219 | ||
220 | ret = recvmsg(socket, &hdr, 0); | |
221 | if (ret < 0) | |
222 | return -1; | |
223 | ||
224 | req = buf; | |
225 | ||
226 | if (req != SOCKET_OK) | |
227 | return -1; | |
228 | ||
229 | CMSGHDR_TO_FD(*chdr, fd); | |
230 | ||
231 | return fd; | |
232 | } | |
233 | ||
234 | /* connect socket_fd in secondary process to the primary process's socket */ | |
235 | int | |
236 | vfio_mp_sync_connect_to_primary(void) | |
237 | { | |
238 | struct sockaddr_un addr; | |
239 | socklen_t sockaddr_len; | |
240 | int socket_fd; | |
241 | ||
242 | /* set up a socket */ | |
243 | socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); | |
244 | if (socket_fd < 0) { | |
245 | RTE_LOG(ERR, EAL, "Failed to create socket!\n"); | |
246 | return -1; | |
247 | } | |
248 | ||
249 | get_socket_path(addr.sun_path, sizeof(addr.sun_path)); | |
250 | addr.sun_family = AF_UNIX; | |
251 | ||
252 | sockaddr_len = sizeof(struct sockaddr_un); | |
253 | ||
254 | if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) | |
255 | return socket_fd; | |
256 | ||
257 | /* if connect failed */ | |
258 | close(socket_fd); | |
259 | return -1; | |
260 | } | |
261 | ||
262 | ||
263 | ||
264 | /* | |
265 | * socket listening thread for primary process | |
266 | */ | |
267 | static __attribute__((noreturn)) void * | |
268 | vfio_mp_sync_thread(void __rte_unused * arg) | |
269 | { | |
11fdf7f2 | 270 | int ret, fd, vfio_data; |
7c673cae FG |
271 | |
272 | /* wait for requests on the socket */ | |
273 | for (;;) { | |
274 | int conn_sock; | |
275 | struct sockaddr_un addr; | |
276 | socklen_t sockaddr_len = sizeof(addr); | |
277 | ||
278 | /* this is a blocking call */ | |
279 | conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, | |
280 | &sockaddr_len); | |
281 | ||
282 | /* just restart on error */ | |
283 | if (conn_sock == -1) | |
284 | continue; | |
285 | ||
286 | /* set socket to linger after close */ | |
287 | struct linger l; | |
288 | l.l_onoff = 1; | |
289 | l.l_linger = 60; | |
290 | ||
291 | if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0) | |
292 | RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option " | |
293 | "on listen socket (%s)\n", strerror(errno)); | |
294 | ||
295 | ret = vfio_mp_sync_receive_request(conn_sock); | |
296 | ||
297 | switch (ret) { | |
298 | case SOCKET_REQ_CONTAINER: | |
299 | fd = vfio_get_container_fd(); | |
300 | if (fd < 0) | |
301 | vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); | |
302 | else | |
303 | vfio_mp_sync_send_fd(conn_sock, fd); | |
11fdf7f2 | 304 | close(fd); |
7c673cae FG |
305 | break; |
306 | case SOCKET_REQ_GROUP: | |
307 | /* wait for group number */ | |
11fdf7f2 TL |
308 | vfio_data = vfio_mp_sync_receive_request(conn_sock); |
309 | if (vfio_data < 0) { | |
7c673cae FG |
310 | close(conn_sock); |
311 | continue; | |
312 | } | |
313 | ||
11fdf7f2 | 314 | fd = vfio_get_group_fd(vfio_data); |
7c673cae FG |
315 | |
316 | if (fd < 0) | |
317 | vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); | |
318 | /* if VFIO group exists but isn't bound to VFIO driver */ | |
319 | else if (fd == 0) | |
320 | vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); | |
321 | /* if group exists and is bound to VFIO driver */ | |
322 | else { | |
323 | vfio_mp_sync_send_request(conn_sock, SOCKET_OK); | |
324 | vfio_mp_sync_send_fd(conn_sock, fd); | |
325 | } | |
326 | break; | |
11fdf7f2 TL |
327 | case SOCKET_CLR_GROUP: |
328 | /* wait for group fd */ | |
329 | vfio_data = vfio_mp_sync_receive_request(conn_sock); | |
330 | if (vfio_data < 0) { | |
331 | close(conn_sock); | |
332 | continue; | |
333 | } | |
334 | ||
335 | ret = clear_group(vfio_data); | |
336 | ||
337 | if (ret < 0) | |
338 | vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); | |
339 | else | |
340 | vfio_mp_sync_send_request(conn_sock, SOCKET_OK); | |
341 | break; | |
7c673cae FG |
342 | default: |
343 | vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); | |
344 | break; | |
345 | } | |
346 | close(conn_sock); | |
347 | } | |
348 | } | |
349 | ||
350 | static int | |
351 | vfio_mp_sync_socket_setup(void) | |
352 | { | |
353 | int ret, socket_fd; | |
354 | struct sockaddr_un addr; | |
355 | socklen_t sockaddr_len; | |
356 | ||
357 | /* set up a socket */ | |
358 | socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); | |
359 | if (socket_fd < 0) { | |
360 | RTE_LOG(ERR, EAL, "Failed to create socket!\n"); | |
361 | return -1; | |
362 | } | |
363 | ||
364 | get_socket_path(addr.sun_path, sizeof(addr.sun_path)); | |
365 | addr.sun_family = AF_UNIX; | |
366 | ||
367 | sockaddr_len = sizeof(struct sockaddr_un); | |
368 | ||
369 | unlink(addr.sun_path); | |
370 | ||
371 | ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); | |
372 | if (ret) { | |
373 | RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); | |
374 | close(socket_fd); | |
375 | return -1; | |
376 | } | |
377 | ||
378 | ret = listen(socket_fd, 50); | |
379 | if (ret) { | |
380 | RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); | |
381 | close(socket_fd); | |
382 | return -1; | |
383 | } | |
384 | ||
385 | /* save the socket in local configuration */ | |
386 | mp_socket_fd = socket_fd; | |
387 | ||
388 | return 0; | |
389 | } | |
390 | ||
391 | /* | |
392 | * set up a local socket and tell it to listen for incoming connections | |
393 | */ | |
394 | int | |
395 | vfio_mp_sync_setup(void) | |
396 | { | |
397 | int ret; | |
398 | char thread_name[RTE_MAX_THREAD_NAME_LEN]; | |
399 | ||
400 | if (vfio_mp_sync_socket_setup() < 0) { | |
401 | RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); | |
402 | return -1; | |
403 | } | |
404 | ||
405 | ret = pthread_create(&socket_thread, NULL, | |
406 | vfio_mp_sync_thread, NULL); | |
407 | if (ret) { | |
408 | RTE_LOG(ERR, EAL, | |
409 | "Failed to create thread for communication with secondary processes!\n"); | |
410 | close(mp_socket_fd); | |
411 | return -1; | |
412 | } | |
413 | ||
414 | /* Set thread_name for aid in debugging. */ | |
415 | snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync"); | |
416 | ret = rte_thread_setname(socket_thread, thread_name); | |
417 | if (ret) | |
418 | RTE_LOG(DEBUG, EAL, | |
419 | "Failed to set thread name for secondary processes!\n"); | |
420 | ||
421 | return 0; | |
422 | } | |
423 | ||
424 | #endif |