]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2016 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | #include <sys/socket.h> | |
6 | #include <sys/types.h> | |
7 | #include <sys/stat.h> | |
8 | #include <unistd.h> | |
9 | #include <fcntl.h> | |
10 | #include <sys/un.h> | |
11 | #include <string.h> | |
12 | #include <errno.h> | |
13 | ||
9f95a23c TL |
14 | #include <rte_string_fns.h> |
15 | #include <rte_fbarray.h> | |
16 | #include <rte_eal_memconfig.h> | |
17 | ||
7c673cae | 18 | #include "vhost.h" |
11fdf7f2 TL |
19 | #include "virtio_user_dev.h" |
20 | ||
21 | /* The version of the protocol we support */ | |
22 | #define VHOST_USER_VERSION 0x1 | |
23 | ||
24 | #define VHOST_MEMORY_MAX_NREGIONS 8 | |
25 | struct vhost_memory { | |
26 | uint32_t nregions; | |
27 | uint32_t padding; | |
28 | struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; | |
29 | }; | |
30 | ||
31 | struct vhost_user_msg { | |
32 | enum vhost_user_request request; | |
33 | ||
34 | #define VHOST_USER_VERSION_MASK 0x3 | |
35 | #define VHOST_USER_REPLY_MASK (0x1 << 2) | |
36 | uint32_t flags; | |
37 | uint32_t size; /* the following payload size */ | |
38 | union { | |
39 | #define VHOST_USER_VRING_IDX_MASK 0xff | |
40 | #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) | |
41 | uint64_t u64; | |
42 | struct vhost_vring_state state; | |
43 | struct vhost_vring_addr addr; | |
44 | struct vhost_memory memory; | |
45 | } payload; | |
46 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
47 | } __attribute((packed)); | |
48 | ||
49 | #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) | |
50 | #define VHOST_USER_PAYLOAD_SIZE \ | |
51 | (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) | |
7c673cae FG |
52 | |
53 | static int | |
54 | vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) | |
55 | { | |
56 | int r; | |
57 | struct msghdr msgh; | |
58 | struct iovec iov; | |
59 | size_t fd_size = fd_num * sizeof(int); | |
60 | char control[CMSG_SPACE(fd_size)]; | |
61 | struct cmsghdr *cmsg; | |
62 | ||
63 | memset(&msgh, 0, sizeof(msgh)); | |
64 | memset(control, 0, sizeof(control)); | |
65 | ||
66 | iov.iov_base = (uint8_t *)buf; | |
67 | iov.iov_len = len; | |
68 | ||
69 | msgh.msg_iov = &iov; | |
70 | msgh.msg_iovlen = 1; | |
71 | msgh.msg_control = control; | |
72 | msgh.msg_controllen = sizeof(control); | |
73 | ||
74 | cmsg = CMSG_FIRSTHDR(&msgh); | |
75 | cmsg->cmsg_len = CMSG_LEN(fd_size); | |
76 | cmsg->cmsg_level = SOL_SOCKET; | |
77 | cmsg->cmsg_type = SCM_RIGHTS; | |
78 | memcpy(CMSG_DATA(cmsg), fds, fd_size); | |
79 | ||
80 | do { | |
81 | r = sendmsg(fd, &msgh, 0); | |
82 | } while (r < 0 && errno == EINTR); | |
83 | ||
84 | return r; | |
85 | } | |
86 | ||
87 | static int | |
88 | vhost_user_read(int fd, struct vhost_user_msg *msg) | |
89 | { | |
90 | uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; | |
91 | int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; | |
92 | ||
93 | ret = recv(fd, (void *)msg, sz_hdr, 0); | |
94 | if (ret < sz_hdr) { | |
95 | PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", | |
96 | ret, sz_hdr); | |
97 | goto fail; | |
98 | } | |
99 | ||
100 | /* validate msg flags */ | |
101 | if (msg->flags != (valid_flags)) { | |
102 | PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", | |
103 | msg->flags, valid_flags); | |
104 | goto fail; | |
105 | } | |
106 | ||
107 | sz_payload = msg->size; | |
9f95a23c TL |
108 | |
109 | if ((size_t)sz_payload > sizeof(msg->payload)) | |
110 | goto fail; | |
111 | ||
7c673cae FG |
112 | if (sz_payload) { |
113 | ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); | |
114 | if (ret < sz_payload) { | |
115 | PMD_DRV_LOG(ERR, | |
116 | "Failed to recv msg payload: %d instead of %d.", | |
117 | ret, msg->size); | |
118 | goto fail; | |
119 | } | |
120 | } | |
121 | ||
122 | return 0; | |
123 | ||
124 | fail: | |
125 | return -1; | |
126 | } | |
127 | ||
9f95a23c TL |
128 | struct walk_arg { |
129 | struct vhost_memory *vm; | |
130 | int *fds; | |
131 | int region_nr; | |
7c673cae FG |
132 | }; |
133 | ||
7c673cae | 134 | static int |
9f95a23c TL |
135 | update_memory_region(const struct rte_memseg_list *msl __rte_unused, |
136 | const struct rte_memseg *ms, void *arg) | |
7c673cae | 137 | { |
9f95a23c TL |
138 | struct walk_arg *wa = arg; |
139 | struct vhost_memory_region *mr; | |
140 | uint64_t start_addr, end_addr; | |
141 | size_t offset; | |
142 | int i, fd; | |
143 | ||
144 | fd = rte_memseg_get_fd_thread_unsafe(ms); | |
145 | if (fd < 0) { | |
146 | PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d", | |
147 | ms, rte_errno); | |
7c673cae FG |
148 | return -1; |
149 | } | |
150 | ||
9f95a23c TL |
151 | if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) { |
152 | PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d", | |
153 | ms, rte_errno); | |
154 | return -1; | |
155 | } | |
7c673cae | 156 | |
9f95a23c TL |
157 | start_addr = (uint64_t)(uintptr_t)ms->addr; |
158 | end_addr = start_addr + ms->len; | |
7c673cae | 159 | |
9f95a23c TL |
160 | for (i = 0; i < wa->region_nr; i++) { |
161 | if (wa->fds[i] != fd) | |
7c673cae FG |
162 | continue; |
163 | ||
9f95a23c TL |
164 | mr = &wa->vm->regions[i]; |
165 | ||
166 | if (mr->userspace_addr + mr->memory_size < end_addr) | |
167 | mr->memory_size = end_addr - mr->userspace_addr; | |
7c673cae | 168 | |
9f95a23c TL |
169 | if (mr->userspace_addr > start_addr) { |
170 | mr->userspace_addr = start_addr; | |
171 | mr->guest_phys_addr = start_addr; | |
7c673cae | 172 | } |
9f95a23c TL |
173 | |
174 | if (mr->mmap_offset > offset) | |
175 | mr->mmap_offset = offset; | |
176 | ||
177 | PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 | |
178 | " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, | |
179 | mr->mmap_offset, mr->userspace_addr, | |
180 | mr->memory_size); | |
181 | ||
182 | return 0; | |
7c673cae FG |
183 | } |
184 | ||
9f95a23c TL |
185 | if (i >= VHOST_MEMORY_MAX_NREGIONS) { |
186 | PMD_DRV_LOG(ERR, "Too many memory regions"); | |
187 | return -1; | |
188 | } | |
7c673cae | 189 | |
9f95a23c TL |
190 | mr = &wa->vm->regions[i]; |
191 | wa->fds[i] = fd; | |
192 | ||
193 | mr->guest_phys_addr = start_addr; | |
194 | mr->userspace_addr = start_addr; | |
195 | mr->memory_size = ms->len; | |
196 | mr->mmap_offset = offset; | |
197 | ||
198 | PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 | |
199 | " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, | |
200 | mr->mmap_offset, mr->userspace_addr, | |
201 | mr->memory_size); | |
202 | ||
203 | wa->region_nr++; | |
204 | ||
205 | return 0; | |
7c673cae FG |
206 | } |
207 | ||
208 | static int | |
209 | prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) | |
210 | { | |
9f95a23c | 211 | struct walk_arg wa; |
7c673cae | 212 | |
9f95a23c TL |
213 | wa.region_nr = 0; |
214 | wa.vm = &msg->payload.memory; | |
215 | wa.fds = fds; | |
7c673cae | 216 | |
9f95a23c TL |
217 | /* |
218 | * The memory lock has already been taken by memory subsystem | |
219 | * or virtio_user_start_device(). | |
220 | */ | |
221 | if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0) | |
222 | return -1; | |
7c673cae | 223 | |
9f95a23c | 224 | msg->payload.memory.nregions = wa.region_nr; |
7c673cae FG |
225 | msg->payload.memory.padding = 0; |
226 | ||
227 | return 0; | |
228 | } | |
229 | ||
230 | static struct vhost_user_msg m; | |
231 | ||
11fdf7f2 TL |
232 | const char * const vhost_msg_strings[] = { |
233 | [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", | |
234 | [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", | |
235 | [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", | |
236 | [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", | |
237 | [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", | |
238 | [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", | |
239 | [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", | |
240 | [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", | |
241 | [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", | |
242 | [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", | |
243 | [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", | |
244 | [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", | |
7c673cae FG |
245 | }; |
246 | ||
11fdf7f2 TL |
247 | static int |
248 | vhost_user_sock(struct virtio_user_dev *dev, | |
249 | enum vhost_user_request req, | |
250 | void *arg) | |
7c673cae FG |
251 | { |
252 | struct vhost_user_msg msg; | |
253 | struct vhost_vring_file *file = 0; | |
254 | int need_reply = 0; | |
255 | int fds[VHOST_MEMORY_MAX_NREGIONS]; | |
256 | int fd_num = 0; | |
9f95a23c | 257 | int len; |
11fdf7f2 | 258 | int vhostfd = dev->vhostfd; |
7c673cae FG |
259 | |
260 | RTE_SET_USED(m); | |
7c673cae FG |
261 | |
262 | PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); | |
263 | ||
9f95a23c TL |
264 | if (dev->is_server && vhostfd < 0) |
265 | return -1; | |
266 | ||
7c673cae FG |
267 | msg.request = req; |
268 | msg.flags = VHOST_USER_VERSION; | |
269 | msg.size = 0; | |
270 | ||
271 | switch (req) { | |
272 | case VHOST_USER_GET_FEATURES: | |
273 | need_reply = 1; | |
274 | break; | |
275 | ||
276 | case VHOST_USER_SET_FEATURES: | |
277 | case VHOST_USER_SET_LOG_BASE: | |
278 | msg.payload.u64 = *((__u64 *)arg); | |
279 | msg.size = sizeof(m.payload.u64); | |
280 | break; | |
281 | ||
282 | case VHOST_USER_SET_OWNER: | |
283 | case VHOST_USER_RESET_OWNER: | |
284 | break; | |
285 | ||
286 | case VHOST_USER_SET_MEM_TABLE: | |
287 | if (prepare_vhost_memory_user(&msg, fds) < 0) | |
288 | return -1; | |
289 | fd_num = msg.payload.memory.nregions; | |
290 | msg.size = sizeof(m.payload.memory.nregions); | |
291 | msg.size += sizeof(m.payload.memory.padding); | |
292 | msg.size += fd_num * sizeof(struct vhost_memory_region); | |
293 | break; | |
294 | ||
295 | case VHOST_USER_SET_LOG_FD: | |
296 | fds[fd_num++] = *((int *)arg); | |
297 | break; | |
298 | ||
299 | case VHOST_USER_SET_VRING_NUM: | |
300 | case VHOST_USER_SET_VRING_BASE: | |
301 | case VHOST_USER_SET_VRING_ENABLE: | |
302 | memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); | |
303 | msg.size = sizeof(m.payload.state); | |
304 | break; | |
305 | ||
306 | case VHOST_USER_GET_VRING_BASE: | |
307 | memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); | |
308 | msg.size = sizeof(m.payload.state); | |
309 | need_reply = 1; | |
310 | break; | |
311 | ||
312 | case VHOST_USER_SET_VRING_ADDR: | |
313 | memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); | |
314 | msg.size = sizeof(m.payload.addr); | |
315 | break; | |
316 | ||
317 | case VHOST_USER_SET_VRING_KICK: | |
318 | case VHOST_USER_SET_VRING_CALL: | |
319 | case VHOST_USER_SET_VRING_ERR: | |
320 | file = arg; | |
321 | msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; | |
322 | msg.size = sizeof(m.payload.u64); | |
323 | if (file->fd > 0) | |
324 | fds[fd_num++] = file->fd; | |
325 | else | |
326 | msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; | |
327 | break; | |
328 | ||
329 | default: | |
330 | PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); | |
331 | return -1; | |
332 | } | |
333 | ||
334 | len = VHOST_USER_HDR_SIZE + msg.size; | |
335 | if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { | |
336 | PMD_DRV_LOG(ERR, "%s failed: %s", | |
337 | vhost_msg_strings[req], strerror(errno)); | |
338 | return -1; | |
339 | } | |
340 | ||
7c673cae FG |
341 | if (need_reply) { |
342 | if (vhost_user_read(vhostfd, &msg) < 0) { | |
343 | PMD_DRV_LOG(ERR, "Received msg failed: %s", | |
344 | strerror(errno)); | |
345 | return -1; | |
346 | } | |
347 | ||
348 | if (req != msg.request) { | |
349 | PMD_DRV_LOG(ERR, "Received unexpected msg type"); | |
350 | return -1; | |
351 | } | |
352 | ||
353 | switch (req) { | |
354 | case VHOST_USER_GET_FEATURES: | |
355 | if (msg.size != sizeof(m.payload.u64)) { | |
356 | PMD_DRV_LOG(ERR, "Received bad msg size"); | |
357 | return -1; | |
358 | } | |
359 | *((__u64 *)arg) = msg.payload.u64; | |
360 | break; | |
361 | case VHOST_USER_GET_VRING_BASE: | |
362 | if (msg.size != sizeof(m.payload.state)) { | |
363 | PMD_DRV_LOG(ERR, "Received bad msg size"); | |
364 | return -1; | |
365 | } | |
366 | memcpy(arg, &msg.payload.state, | |
367 | sizeof(struct vhost_vring_state)); | |
368 | break; | |
369 | default: | |
370 | PMD_DRV_LOG(ERR, "Received unexpected msg type"); | |
371 | return -1; | |
372 | } | |
373 | } | |
374 | ||
375 | return 0; | |
376 | } | |
377 | ||
9f95a23c TL |
378 | #define MAX_VIRTIO_USER_BACKLOG 1 |
379 | static int | |
380 | virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un) | |
381 | { | |
382 | int ret; | |
383 | int flag; | |
384 | int fd = dev->listenfd; | |
385 | ||
386 | ret = bind(fd, (struct sockaddr *)un, sizeof(*un)); | |
387 | if (ret < 0) { | |
388 | PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n", | |
389 | dev->path, strerror(errno)); | |
390 | return -1; | |
391 | } | |
392 | ret = listen(fd, MAX_VIRTIO_USER_BACKLOG); | |
393 | if (ret < 0) | |
394 | return -1; | |
395 | ||
396 | flag = fcntl(fd, F_GETFL); | |
397 | if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) { | |
398 | PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno)); | |
399 | return -1; | |
400 | } | |
401 | ||
402 | return 0; | |
403 | } | |
404 | ||
7c673cae FG |
405 | /** |
406 | * Set up environment to talk with a vhost user backend. | |
7c673cae FG |
407 | * |
408 | * @return | |
11fdf7f2 TL |
409 | * - (-1) if fail; |
410 | * - (0) if succeed. | |
7c673cae | 411 | */ |
11fdf7f2 TL |
412 | static int |
413 | vhost_user_setup(struct virtio_user_dev *dev) | |
7c673cae FG |
414 | { |
415 | int fd; | |
416 | int flag; | |
417 | struct sockaddr_un un; | |
418 | ||
419 | fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
420 | if (fd < 0) { | |
421 | PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); | |
422 | return -1; | |
423 | } | |
424 | ||
425 | flag = fcntl(fd, F_GETFD); | |
426 | if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) | |
427 | PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); | |
428 | ||
429 | memset(&un, 0, sizeof(un)); | |
430 | un.sun_family = AF_UNIX; | |
9f95a23c TL |
431 | strlcpy(un.sun_path, dev->path, sizeof(un.sun_path)); |
432 | ||
433 | if (dev->is_server) { | |
434 | dev->listenfd = fd; | |
435 | if (virtio_user_start_server(dev, &un) < 0) { | |
436 | PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode"); | |
437 | close(fd); | |
438 | return -1; | |
439 | } | |
440 | dev->vhostfd = -1; | |
441 | } else { | |
442 | if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { | |
443 | PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); | |
444 | close(fd); | |
445 | return -1; | |
446 | } | |
447 | dev->vhostfd = fd; | |
7c673cae FG |
448 | } |
449 | ||
11fdf7f2 | 450 | return 0; |
7c673cae FG |
451 | } |
452 | ||
11fdf7f2 TL |
453 | static int |
454 | vhost_user_enable_queue_pair(struct virtio_user_dev *dev, | |
455 | uint16_t pair_idx, | |
456 | int enable) | |
7c673cae FG |
457 | { |
458 | int i; | |
459 | ||
460 | for (i = 0; i < 2; ++i) { | |
461 | struct vhost_vring_state state = { | |
462 | .index = pair_idx * 2 + i, | |
463 | .num = enable, | |
464 | }; | |
465 | ||
11fdf7f2 | 466 | if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state)) |
7c673cae FG |
467 | return -1; |
468 | } | |
469 | ||
470 | return 0; | |
471 | } | |
11fdf7f2 | 472 | |
9f95a23c | 473 | struct virtio_user_backend_ops virtio_ops_user = { |
11fdf7f2 TL |
474 | .setup = vhost_user_setup, |
475 | .send_request = vhost_user_sock, | |
476 | .enable_qp = vhost_user_enable_queue_pair | |
477 | }; |