]>
Commit | Line | Data |
---|---|---|
a75eb03b DM |
1 | /* |
2 | * Copyright 6WIND S.A., 2014 | |
3 | * | |
4 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
5 | * (at your option) any later version. See the COPYING file in the | |
6 | * top-level directory. | |
7 | */ | |
ccd241b5 | 8 | #include "qemu/osdep.h" |
a75eb03b DM |
9 | #include "qemu-common.h" |
10 | #include "qemu/sockets.h" | |
11 | ||
12 | #include <sys/mman.h> | |
a75eb03b DM |
13 | #include <sys/socket.h> |
14 | #include <sys/un.h> | |
1e21feb6 MAL |
15 | #ifdef CONFIG_LINUX |
16 | #include <sys/vfs.h> | |
17 | #endif | |
a75eb03b DM |
18 | |
19 | #include "ivshmem-server.h" | |
20 | ||
21 | /* log a message on stdout if verbose=1 */ | |
22 | #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \ | |
23 | if ((server)->verbose) { \ | |
24 | printf(fmt, ## __VA_ARGS__); \ | |
25 | } \ | |
26 | } while (0) | |
27 | ||
28 | /** maximum size of a huge page, used by ivshmem_server_ftruncate() */ | |
29 | #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024) | |
30 | ||
31 | /** default listen backlog (number of sockets not accepted) */ | |
32 | #define IVSHMEM_SERVER_LISTEN_BACKLOG 10 | |
33 | ||
34 | /* send message to a client unix socket */ | |
35 | static int | |
f7a199b2 | 36 | ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd) |
a75eb03b DM |
37 | { |
38 | int ret; | |
39 | struct msghdr msg; | |
40 | struct iovec iov[1]; | |
41 | union { | |
42 | struct cmsghdr cmsg; | |
43 | char control[CMSG_SPACE(sizeof(int))]; | |
44 | } msg_control; | |
45 | struct cmsghdr *cmsg; | |
46 | ||
f7a199b2 | 47 | peer_id = GINT64_TO_LE(peer_id); |
a75eb03b DM |
48 | iov[0].iov_base = &peer_id; |
49 | iov[0].iov_len = sizeof(peer_id); | |
50 | ||
51 | memset(&msg, 0, sizeof(msg)); | |
52 | msg.msg_iov = iov; | |
53 | msg.msg_iovlen = 1; | |
54 | ||
55 | /* if fd is specified, add it in a cmsg */ | |
56 | if (fd >= 0) { | |
57 | memset(&msg_control, 0, sizeof(msg_control)); | |
58 | msg.msg_control = &msg_control; | |
59 | msg.msg_controllen = sizeof(msg_control); | |
60 | cmsg = CMSG_FIRSTHDR(&msg); | |
61 | cmsg->cmsg_level = SOL_SOCKET; | |
62 | cmsg->cmsg_type = SCM_RIGHTS; | |
63 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | |
64 | memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); | |
65 | } | |
66 | ||
67 | ret = sendmsg(sock_fd, &msg, 0); | |
68 | if (ret <= 0) { | |
69 | return -1; | |
70 | } | |
71 | ||
72 | return 0; | |
73 | } | |
74 | ||
75 | /* free a peer when the server advertises a disconnection or when the | |
76 | * server is freed */ | |
77 | static void | |
78 | ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer) | |
79 | { | |
80 | unsigned vector; | |
81 | IvshmemServerPeer *other_peer; | |
82 | ||
f7a199b2 | 83 | IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id); |
a75eb03b DM |
84 | close(peer->sock_fd); |
85 | QTAILQ_REMOVE(&server->peer_list, peer, next); | |
86 | ||
87 | /* advertise the deletion to other peers */ | |
88 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { | |
89 | ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1); | |
90 | } | |
91 | ||
92 | for (vector = 0; vector < peer->vectors_count; vector++) { | |
93 | event_notifier_cleanup(&peer->vectors[vector]); | |
94 | } | |
95 | ||
96 | g_free(peer); | |
97 | } | |
98 | ||
99 | /* send the peer id and the shm_fd just after a new client connection */ | |
100 | static int | |
101 | ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer) | |
102 | { | |
103 | int ret; | |
104 | ||
5105b1d8 DM |
105 | /* send our protocol version first */ |
106 | ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION, | |
107 | -1); | |
108 | if (ret < 0) { | |
109 | IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n", | |
110 | strerror(errno)); | |
111 | return -1; | |
112 | } | |
113 | ||
a75eb03b DM |
114 | /* send the peer id to the client */ |
115 | ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1); | |
116 | if (ret < 0) { | |
117 | IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n", | |
118 | strerror(errno)); | |
119 | return -1; | |
120 | } | |
121 | ||
122 | /* send the shm_fd */ | |
123 | ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd); | |
124 | if (ret < 0) { | |
125 | IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n", | |
126 | strerror(errno)); | |
127 | return -1; | |
128 | } | |
129 | ||
130 | return 0; | |
131 | } | |
132 | ||
133 | /* handle message on listening unix socket (new client connection) */ | |
134 | static int | |
135 | ivshmem_server_handle_new_conn(IvshmemServer *server) | |
136 | { | |
137 | IvshmemServerPeer *peer, *other_peer; | |
138 | struct sockaddr_un unaddr; | |
139 | socklen_t unaddr_len; | |
140 | int newfd; | |
141 | unsigned i; | |
142 | ||
143 | /* accept the incoming connection */ | |
144 | unaddr_len = sizeof(unaddr); | |
145 | newfd = qemu_accept(server->sock_fd, | |
146 | (struct sockaddr *)&unaddr, &unaddr_len); | |
147 | ||
148 | if (newfd < 0) { | |
149 | IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno)); | |
150 | return -1; | |
151 | } | |
152 | ||
153 | qemu_set_nonblock(newfd); | |
154 | IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd); | |
155 | ||
156 | /* allocate new structure for this peer */ | |
157 | peer = g_malloc0(sizeof(*peer)); | |
158 | peer->sock_fd = newfd; | |
159 | ||
160 | /* get an unused peer id */ | |
022cffe3 MAL |
161 | /* XXX: this could use id allocation such as Linux IDA, or simply |
162 | * a free-list */ | |
163 | for (i = 0; i < G_MAXUINT16; i++) { | |
164 | if (ivshmem_server_search_peer(server, server->cur_id) == NULL) { | |
165 | break; | |
166 | } | |
a75eb03b DM |
167 | server->cur_id++; |
168 | } | |
022cffe3 MAL |
169 | if (i == G_MAXUINT16) { |
170 | IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n"); | |
258133bd GA |
171 | close(newfd); |
172 | g_free(peer); | |
173 | return -1; | |
022cffe3 | 174 | } |
a75eb03b DM |
175 | peer->id = server->cur_id++; |
176 | ||
177 | /* create eventfd, one per vector */ | |
178 | peer->vectors_count = server->n_vectors; | |
179 | for (i = 0; i < peer->vectors_count; i++) { | |
180 | if (event_notifier_init(&peer->vectors[i], FALSE) < 0) { | |
181 | IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n"); | |
182 | goto fail; | |
183 | } | |
184 | } | |
185 | ||
186 | /* send peer id and shm fd */ | |
187 | if (ivshmem_server_send_initial_info(server, peer) < 0) { | |
188 | IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n"); | |
189 | goto fail; | |
190 | } | |
191 | ||
192 | /* advertise the new peer to others */ | |
193 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { | |
194 | for (i = 0; i < peer->vectors_count; i++) { | |
195 | ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, | |
196 | peer->vectors[i].wfd); | |
197 | } | |
198 | } | |
199 | ||
200 | /* advertise the other peers to the new one */ | |
201 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { | |
202 | for (i = 0; i < peer->vectors_count; i++) { | |
203 | ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id, | |
204 | other_peer->vectors[i].wfd); | |
205 | } | |
206 | } | |
207 | ||
208 | /* advertise the new peer to itself */ | |
209 | for (i = 0; i < peer->vectors_count; i++) { | |
210 | ivshmem_server_send_one_msg(peer->sock_fd, peer->id, | |
211 | event_notifier_get_fd(&peer->vectors[i])); | |
212 | } | |
213 | ||
214 | QTAILQ_INSERT_TAIL(&server->peer_list, peer, next); | |
f7a199b2 | 215 | IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n", |
a75eb03b DM |
216 | peer->id); |
217 | return 0; | |
218 | ||
219 | fail: | |
220 | while (i--) { | |
221 | event_notifier_cleanup(&peer->vectors[i]); | |
222 | } | |
223 | close(newfd); | |
224 | g_free(peer); | |
225 | return -1; | |
226 | } | |
227 | ||
228 | /* Try to ftruncate a file to next power of 2 of shmsize. | |
229 | * If it fails; all power of 2 above shmsize are tested until | |
230 | * we reach the maximum huge page size. This is useful | |
231 | * if the shm file is in a hugetlbfs that cannot be truncated to the | |
232 | * shm_size value. */ | |
233 | static int | |
234 | ivshmem_server_ftruncate(int fd, unsigned shmsize) | |
235 | { | |
236 | int ret; | |
237 | struct stat mapstat; | |
238 | ||
239 | /* align shmsize to next power of 2 */ | |
240 | shmsize = pow2ceil(shmsize); | |
241 | ||
242 | if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) { | |
243 | return 0; | |
244 | } | |
245 | ||
246 | while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) { | |
247 | ret = ftruncate(fd, shmsize); | |
248 | if (ret == 0) { | |
249 | return ret; | |
250 | } | |
251 | shmsize *= 2; | |
252 | } | |
253 | ||
254 | return -1; | |
255 | } | |
256 | ||
257 | /* Init a new ivshmem server */ | |
258 | int | |
259 | ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path, | |
260 | const char *shm_path, size_t shm_size, unsigned n_vectors, | |
261 | bool verbose) | |
262 | { | |
263 | int ret; | |
264 | ||
265 | memset(server, 0, sizeof(*server)); | |
266 | server->verbose = verbose; | |
267 | ||
268 | ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path), | |
269 | "%s", unix_sock_path); | |
270 | if (ret < 0 || ret >= sizeof(server->unix_sock_path)) { | |
271 | IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n"); | |
272 | return -1; | |
273 | } | |
274 | ret = snprintf(server->shm_path, sizeof(server->shm_path), | |
275 | "%s", shm_path); | |
276 | if (ret < 0 || ret >= sizeof(server->shm_path)) { | |
277 | IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n"); | |
278 | return -1; | |
279 | } | |
280 | ||
281 | server->shm_size = shm_size; | |
282 | server->n_vectors = n_vectors; | |
283 | ||
284 | QTAILQ_INIT(&server->peer_list); | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
1e21feb6 MAL |
289 | #ifdef CONFIG_LINUX |
290 | ||
291 | #define HUGETLBFS_MAGIC 0x958458f6 | |
292 | ||
293 | static long gethugepagesize(const char *path) | |
294 | { | |
295 | struct statfs fs; | |
296 | int ret; | |
297 | ||
298 | do { | |
299 | ret = statfs(path, &fs); | |
300 | } while (ret != 0 && errno == EINTR); | |
301 | ||
302 | if (ret != 0) { | |
303 | return -1; | |
304 | } | |
305 | ||
306 | if (fs.f_type != HUGETLBFS_MAGIC) { | |
307 | return -1; | |
308 | } | |
309 | ||
310 | return fs.f_bsize; | |
311 | } | |
312 | #endif | |
313 | ||
a75eb03b DM |
314 | /* open shm, create and bind to the unix socket */ |
315 | int | |
316 | ivshmem_server_start(IvshmemServer *server) | |
317 | { | |
318 | struct sockaddr_un sun; | |
319 | int shm_fd, sock_fd, ret; | |
320 | ||
321 | /* open shm file */ | |
1e21feb6 MAL |
322 | #ifdef CONFIG_LINUX |
323 | long hpagesize; | |
324 | ||
325 | hpagesize = gethugepagesize(server->shm_path); | |
326 | if (hpagesize < 0 && errno != ENOENT) { | |
327 | IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n", | |
328 | server->shm_path, strerror(errno)); | |
329 | } | |
330 | ||
331 | if (hpagesize > 0) { | |
332 | gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path); | |
333 | IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path); | |
334 | shm_fd = mkstemp(filename); | |
335 | unlink(filename); | |
336 | g_free(filename); | |
337 | } else | |
338 | #endif | |
339 | { | |
340 | IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n", | |
341 | server->shm_path); | |
342 | shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU); | |
343 | } | |
344 | ||
a75eb03b DM |
345 | if (shm_fd < 0) { |
346 | fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path, | |
347 | strerror(errno)); | |
348 | return -1; | |
349 | } | |
350 | if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) { | |
351 | fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path, | |
352 | strerror(errno)); | |
353 | goto err_close_shm; | |
354 | } | |
355 | ||
356 | IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n", | |
357 | server->unix_sock_path); | |
358 | ||
359 | /* create the unix listening socket */ | |
360 | sock_fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
361 | if (sock_fd < 0) { | |
362 | IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n", | |
363 | strerror(errno)); | |
364 | goto err_close_shm; | |
365 | } | |
366 | ||
367 | sun.sun_family = AF_UNIX; | |
368 | ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", | |
369 | server->unix_sock_path); | |
370 | if (ret < 0 || ret >= sizeof(sun.sun_path)) { | |
371 | IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n"); | |
372 | goto err_close_sock; | |
373 | } | |
374 | if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) { | |
375 | IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path, | |
376 | strerror(errno)); | |
377 | goto err_close_sock; | |
378 | } | |
379 | ||
380 | if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) { | |
381 | IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno)); | |
382 | goto err_close_sock; | |
383 | } | |
384 | ||
385 | server->sock_fd = sock_fd; | |
386 | server->shm_fd = shm_fd; | |
387 | ||
388 | return 0; | |
389 | ||
390 | err_close_sock: | |
391 | close(sock_fd); | |
392 | err_close_shm: | |
393 | close(shm_fd); | |
394 | return -1; | |
395 | } | |
396 | ||
397 | /* close connections to clients, the unix socket and the shm fd */ | |
398 | void | |
399 | ivshmem_server_close(IvshmemServer *server) | |
400 | { | |
401 | IvshmemServerPeer *peer, *npeer; | |
402 | ||
403 | IVSHMEM_SERVER_DEBUG(server, "close server\n"); | |
404 | ||
405 | QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) { | |
406 | ivshmem_server_free_peer(server, peer); | |
407 | } | |
408 | ||
409 | unlink(server->unix_sock_path); | |
410 | close(server->sock_fd); | |
411 | close(server->shm_fd); | |
412 | server->sock_fd = -1; | |
413 | server->shm_fd = -1; | |
414 | } | |
415 | ||
416 | /* get the fd_set according to the unix socket and the peer list */ | |
417 | void | |
418 | ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd) | |
419 | { | |
420 | IvshmemServerPeer *peer; | |
421 | ||
422 | if (server->sock_fd == -1) { | |
423 | return; | |
424 | } | |
425 | ||
426 | FD_SET(server->sock_fd, fds); | |
427 | if (server->sock_fd >= *maxfd) { | |
428 | *maxfd = server->sock_fd + 1; | |
429 | } | |
430 | ||
431 | QTAILQ_FOREACH(peer, &server->peer_list, next) { | |
432 | FD_SET(peer->sock_fd, fds); | |
433 | if (peer->sock_fd >= *maxfd) { | |
434 | *maxfd = peer->sock_fd + 1; | |
435 | } | |
436 | } | |
437 | } | |
438 | ||
439 | /* process incoming messages on the sockets in fd_set */ | |
440 | int | |
441 | ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd) | |
442 | { | |
443 | IvshmemServerPeer *peer, *peer_next; | |
444 | ||
445 | if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) && | |
446 | ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) { | |
447 | IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() " | |
448 | "failed\n"); | |
449 | return -1; | |
450 | } | |
451 | ||
452 | QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) { | |
453 | /* any message from a peer socket result in a close() */ | |
454 | IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd); | |
455 | if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) { | |
456 | ivshmem_server_free_peer(server, peer); | |
457 | } | |
458 | } | |
459 | ||
460 | return 0; | |
461 | } | |
462 | ||
463 | /* lookup peer from its id */ | |
464 | IvshmemServerPeer * | |
f7a199b2 | 465 | ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id) |
a75eb03b DM |
466 | { |
467 | IvshmemServerPeer *peer; | |
468 | ||
469 | QTAILQ_FOREACH(peer, &server->peer_list, next) { | |
470 | if (peer->id == peer_id) { | |
471 | return peer; | |
472 | } | |
473 | } | |
474 | return NULL; | |
475 | } | |
476 | ||
477 | /* dump our info, the list of peers their vectors on stdout */ | |
478 | void | |
479 | ivshmem_server_dump(const IvshmemServer *server) | |
480 | { | |
481 | const IvshmemServerPeer *peer; | |
482 | unsigned vector; | |
483 | ||
484 | /* dump peers */ | |
485 | QTAILQ_FOREACH(peer, &server->peer_list, next) { | |
f7a199b2 | 486 | printf("peer_id = %" PRId64 "\n", peer->id); |
a75eb03b DM |
487 | |
488 | for (vector = 0; vector < peer->vectors_count; vector++) { | |
489 | printf(" vector %d is enabled (fd=%d)\n", vector, | |
490 | event_notifier_get_fd(&peer->vectors[vector])); | |
491 | } | |
492 | } | |
493 | } |