1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
5 #include <sys/socket.h>
15 #include "virtio_user_dev.h"
17 /* The version of the protocol we support */
18 #define VHOST_USER_VERSION 0x1
20 #define VHOST_MEMORY_MAX_NREGIONS 8
24 struct vhost_memory_region regions
[VHOST_MEMORY_MAX_NREGIONS
];
27 struct vhost_user_msg
{
28 enum vhost_user_request request
;
30 #define VHOST_USER_VERSION_MASK 0x3
31 #define VHOST_USER_REPLY_MASK (0x1 << 2)
33 uint32_t size
; /* the following payload size */
35 #define VHOST_USER_VRING_IDX_MASK 0xff
36 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
38 struct vhost_vring_state state
;
39 struct vhost_vring_addr addr
;
40 struct vhost_memory memory
;
42 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
43 } __attribute((packed
));
45 #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
46 #define VHOST_USER_PAYLOAD_SIZE \
47 (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
50 vhost_user_write(int fd
, void *buf
, int len
, int *fds
, int fd_num
)
55 size_t fd_size
= fd_num
* sizeof(int);
56 char control
[CMSG_SPACE(fd_size
)];
59 memset(&msgh
, 0, sizeof(msgh
));
60 memset(control
, 0, sizeof(control
));
62 iov
.iov_base
= (uint8_t *)buf
;
67 msgh
.msg_control
= control
;
68 msgh
.msg_controllen
= sizeof(control
);
70 cmsg
= CMSG_FIRSTHDR(&msgh
);
71 cmsg
->cmsg_len
= CMSG_LEN(fd_size
);
72 cmsg
->cmsg_level
= SOL_SOCKET
;
73 cmsg
->cmsg_type
= SCM_RIGHTS
;
74 memcpy(CMSG_DATA(cmsg
), fds
, fd_size
);
77 r
= sendmsg(fd
, &msgh
, 0);
78 } while (r
< 0 && errno
== EINTR
);
84 vhost_user_read(int fd
, struct vhost_user_msg
*msg
)
86 uint32_t valid_flags
= VHOST_USER_REPLY_MASK
| VHOST_USER_VERSION
;
87 int ret
, sz_hdr
= VHOST_USER_HDR_SIZE
, sz_payload
;
89 ret
= recv(fd
, (void *)msg
, sz_hdr
, 0);
91 PMD_DRV_LOG(ERR
, "Failed to recv msg hdr: %d instead of %d.",
96 /* validate msg flags */
97 if (msg
->flags
!= (valid_flags
)) {
98 PMD_DRV_LOG(ERR
, "Failed to recv msg: flags %x instead of %x.",
99 msg
->flags
, valid_flags
);
103 sz_payload
= msg
->size
;
105 if ((size_t)sz_payload
> sizeof(msg
->payload
))
109 ret
= recv(fd
, (void *)((char *)msg
+ sz_hdr
), sz_payload
, 0);
110 if (ret
< sz_payload
) {
112 "Failed to recv msg payload: %d instead of %d.",
124 struct hugepage_file_info
{
125 uint64_t addr
; /**< virtual addr */
126 size_t size
; /**< the file size */
127 char path
[PATH_MAX
]; /**< path to backing file */
130 /* Two possible options:
131 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
132 * array. This is simple but cannot be used in secondary process because
133 * secondary process will close and munmap that file.
134 * 2. Match HUGEFILE_FMT to find hugepage files directly.
136 * We choose option 2.
139 get_hugepage_file_info(struct hugepage_file_info huges
[], int max
)
143 char buf
[BUFSIZ
], *tmp
, *tail
;
144 char *str_underline
, *str_start
;
146 uint64_t v_start
, v_end
;
149 f
= fopen("/proc/self/maps", "r");
151 PMD_DRV_LOG(ERR
, "cannot open /proc/self/maps");
156 while (fgets(buf
, sizeof(buf
), f
) != NULL
) {
157 if (sscanf(buf
, "%" PRIx64
"-%" PRIx64
, &v_start
, &v_end
) < 2) {
158 PMD_DRV_LOG(ERR
, "Failed to parse address");
162 tmp
= strchr(buf
, ' ') + 1; /** skip address */
163 tmp
= strchr(tmp
, ' ') + 1; /** skip perm */
164 tmp
= strchr(tmp
, ' ') + 1; /** skip offset */
165 tmp
= strchr(tmp
, ' ') + 1; /** skip dev */
166 tmp
= strchr(tmp
, ' ') + 1; /** skip inode */
167 while (*tmp
== ' ') /** skip spaces */
169 tail
= strrchr(tmp
, '\n'); /** remove newline if exists */
173 /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
174 * which is defined in eal_filesystem.h
176 str_underline
= strrchr(tmp
, '_');
180 str_start
= str_underline
- strlen("map");
184 if (sscanf(str_start
, "map_%d", &huge_index
) != 1)
187 /* skip duplicated file which is mapped to different regions */
188 for (k
= 0, exist
= -1; k
< idx
; ++k
) {
189 if (!strcmp(huges
[k
].path
, tmp
)) {
198 PMD_DRV_LOG(ERR
, "Exceed maximum of %d", max
);
202 huges
[idx
].addr
= v_start
;
203 huges
[idx
].size
= v_end
- v_start
; /* To be corrected later */
204 snprintf(huges
[idx
].path
, PATH_MAX
, "%s", tmp
);
208 /* correct the size for files who have many regions */
209 for (k
= 0; k
< idx
; ++k
) {
210 if (stat(huges
[k
].path
, &stats
) < 0) {
211 PMD_DRV_LOG(ERR
, "Failed to stat %s, %s\n",
212 huges
[k
].path
, strerror(errno
));
215 huges
[k
].size
= stats
.st_size
;
216 PMD_DRV_LOG(INFO
, "file %s, size %zx\n",
217 huges
[k
].path
, huges
[k
].size
);
229 prepare_vhost_memory_user(struct vhost_user_msg
*msg
, int fds
[])
232 struct hugepage_file_info huges
[VHOST_MEMORY_MAX_NREGIONS
];
233 struct vhost_memory_region
*mr
;
235 num
= get_hugepage_file_info(huges
, VHOST_MEMORY_MAX_NREGIONS
);
237 PMD_INIT_LOG(ERR
, "Failed to prepare memory for vhost-user");
241 for (i
= 0; i
< num
; ++i
) {
242 mr
= &msg
->payload
.memory
.regions
[i
];
243 mr
->guest_phys_addr
= huges
[i
].addr
; /* use vaddr! */
244 mr
->userspace_addr
= huges
[i
].addr
;
245 mr
->memory_size
= huges
[i
].size
;
247 fds
[i
] = open(huges
[i
].path
, O_RDWR
);
250 msg
->payload
.memory
.nregions
= num
;
251 msg
->payload
.memory
.padding
= 0;
256 static struct vhost_user_msg m
;
258 const char * const vhost_msg_strings
[] = {
259 [VHOST_USER_SET_OWNER
] = "VHOST_SET_OWNER",
260 [VHOST_USER_RESET_OWNER
] = "VHOST_RESET_OWNER",
261 [VHOST_USER_SET_FEATURES
] = "VHOST_SET_FEATURES",
262 [VHOST_USER_GET_FEATURES
] = "VHOST_GET_FEATURES",
263 [VHOST_USER_SET_VRING_CALL
] = "VHOST_SET_VRING_CALL",
264 [VHOST_USER_SET_VRING_NUM
] = "VHOST_SET_VRING_NUM",
265 [VHOST_USER_SET_VRING_BASE
] = "VHOST_SET_VRING_BASE",
266 [VHOST_USER_GET_VRING_BASE
] = "VHOST_GET_VRING_BASE",
267 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_SET_VRING_ADDR",
268 [VHOST_USER_SET_VRING_KICK
] = "VHOST_SET_VRING_KICK",
269 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_SET_MEM_TABLE",
270 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_SET_VRING_ENABLE",
274 vhost_user_sock(struct virtio_user_dev
*dev
,
275 enum vhost_user_request req
,
278 struct vhost_user_msg msg
;
279 struct vhost_vring_file
*file
= 0;
281 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
284 int vhostfd
= dev
->vhostfd
;
288 PMD_DRV_LOG(INFO
, "%s", vhost_msg_strings
[req
]);
290 if (dev
->is_server
&& vhostfd
< 0)
294 msg
.flags
= VHOST_USER_VERSION
;
298 case VHOST_USER_GET_FEATURES
:
302 case VHOST_USER_SET_FEATURES
:
303 case VHOST_USER_SET_LOG_BASE
:
304 msg
.payload
.u64
= *((__u64
*)arg
);
305 msg
.size
= sizeof(m
.payload
.u64
);
308 case VHOST_USER_SET_OWNER
:
309 case VHOST_USER_RESET_OWNER
:
312 case VHOST_USER_SET_MEM_TABLE
:
313 if (prepare_vhost_memory_user(&msg
, fds
) < 0)
315 fd_num
= msg
.payload
.memory
.nregions
;
316 msg
.size
= sizeof(m
.payload
.memory
.nregions
);
317 msg
.size
+= sizeof(m
.payload
.memory
.padding
);
318 msg
.size
+= fd_num
* sizeof(struct vhost_memory_region
);
321 case VHOST_USER_SET_LOG_FD
:
322 fds
[fd_num
++] = *((int *)arg
);
325 case VHOST_USER_SET_VRING_NUM
:
326 case VHOST_USER_SET_VRING_BASE
:
327 case VHOST_USER_SET_VRING_ENABLE
:
328 memcpy(&msg
.payload
.state
, arg
, sizeof(msg
.payload
.state
));
329 msg
.size
= sizeof(m
.payload
.state
);
332 case VHOST_USER_GET_VRING_BASE
:
333 memcpy(&msg
.payload
.state
, arg
, sizeof(msg
.payload
.state
));
334 msg
.size
= sizeof(m
.payload
.state
);
338 case VHOST_USER_SET_VRING_ADDR
:
339 memcpy(&msg
.payload
.addr
, arg
, sizeof(msg
.payload
.addr
));
340 msg
.size
= sizeof(m
.payload
.addr
);
343 case VHOST_USER_SET_VRING_KICK
:
344 case VHOST_USER_SET_VRING_CALL
:
345 case VHOST_USER_SET_VRING_ERR
:
347 msg
.payload
.u64
= file
->index
& VHOST_USER_VRING_IDX_MASK
;
348 msg
.size
= sizeof(m
.payload
.u64
);
350 fds
[fd_num
++] = file
->fd
;
352 msg
.payload
.u64
|= VHOST_USER_VRING_NOFD_MASK
;
356 PMD_DRV_LOG(ERR
, "trying to send unhandled msg type");
360 len
= VHOST_USER_HDR_SIZE
+ msg
.size
;
361 if (vhost_user_write(vhostfd
, &msg
, len
, fds
, fd_num
) < 0) {
362 PMD_DRV_LOG(ERR
, "%s failed: %s",
363 vhost_msg_strings
[req
], strerror(errno
));
367 if (req
== VHOST_USER_SET_MEM_TABLE
)
368 for (i
= 0; i
< fd_num
; ++i
)
372 if (vhost_user_read(vhostfd
, &msg
) < 0) {
373 PMD_DRV_LOG(ERR
, "Received msg failed: %s",
378 if (req
!= msg
.request
) {
379 PMD_DRV_LOG(ERR
, "Received unexpected msg type");
384 case VHOST_USER_GET_FEATURES
:
385 if (msg
.size
!= sizeof(m
.payload
.u64
)) {
386 PMD_DRV_LOG(ERR
, "Received bad msg size");
389 *((__u64
*)arg
) = msg
.payload
.u64
;
391 case VHOST_USER_GET_VRING_BASE
:
392 if (msg
.size
!= sizeof(m
.payload
.state
)) {
393 PMD_DRV_LOG(ERR
, "Received bad msg size");
396 memcpy(arg
, &msg
.payload
.state
,
397 sizeof(struct vhost_vring_state
));
400 PMD_DRV_LOG(ERR
, "Received unexpected msg type");
408 #define MAX_VIRTIO_USER_BACKLOG 1
410 virtio_user_start_server(struct virtio_user_dev
*dev
, struct sockaddr_un
*un
)
414 int fd
= dev
->listenfd
;
416 ret
= bind(fd
, (struct sockaddr
*)un
, sizeof(*un
));
418 PMD_DRV_LOG(ERR
, "failed to bind to %s: %s; remove it and try again\n",
419 dev
->path
, strerror(errno
));
422 ret
= listen(fd
, MAX_VIRTIO_USER_BACKLOG
);
426 flag
= fcntl(fd
, F_GETFL
);
427 fcntl(fd
, F_SETFL
, flag
| O_NONBLOCK
);
433 * Set up environment to talk with a vhost user backend.
440 vhost_user_setup(struct virtio_user_dev
*dev
)
444 struct sockaddr_un un
;
446 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
448 PMD_DRV_LOG(ERR
, "socket() error, %s", strerror(errno
));
452 flag
= fcntl(fd
, F_GETFD
);
453 if (fcntl(fd
, F_SETFD
, flag
| FD_CLOEXEC
) < 0)
454 PMD_DRV_LOG(WARNING
, "fcntl failed, %s", strerror(errno
));
456 memset(&un
, 0, sizeof(un
));
457 un
.sun_family
= AF_UNIX
;
458 snprintf(un
.sun_path
, sizeof(un
.sun_path
), "%s", dev
->path
);
460 if (dev
->is_server
) {
462 if (virtio_user_start_server(dev
, &un
) < 0) {
463 PMD_DRV_LOG(ERR
, "virtio-user startup fails in server mode");
469 if (connect(fd
, (struct sockaddr
*)&un
, sizeof(un
)) < 0) {
470 PMD_DRV_LOG(ERR
, "connect error, %s", strerror(errno
));
481 vhost_user_enable_queue_pair(struct virtio_user_dev
*dev
,
487 for (i
= 0; i
< 2; ++i
) {
488 struct vhost_vring_state state
= {
489 .index
= pair_idx
* 2 + i
,
493 if (vhost_user_sock(dev
, VHOST_USER_SET_VRING_ENABLE
, &state
))
500 struct virtio_user_backend_ops ops_user
= {
501 .setup
= vhost_user_setup
,
502 .send_request
= vhost_user_sock
,
503 .enable_qp
= vhost_user_enable_queue_pair