4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/socket.h>
35 #include <sys/types.h>
46 vhost_user_write(int fd
, void *buf
, int len
, int *fds
, int fd_num
)
51 size_t fd_size
= fd_num
* sizeof(int);
52 char control
[CMSG_SPACE(fd_size
)];
55 memset(&msgh
, 0, sizeof(msgh
));
56 memset(control
, 0, sizeof(control
));
58 iov
.iov_base
= (uint8_t *)buf
;
63 msgh
.msg_control
= control
;
64 msgh
.msg_controllen
= sizeof(control
);
66 cmsg
= CMSG_FIRSTHDR(&msgh
);
67 cmsg
->cmsg_len
= CMSG_LEN(fd_size
);
68 cmsg
->cmsg_level
= SOL_SOCKET
;
69 cmsg
->cmsg_type
= SCM_RIGHTS
;
70 memcpy(CMSG_DATA(cmsg
), fds
, fd_size
);
73 r
= sendmsg(fd
, &msgh
, 0);
74 } while (r
< 0 && errno
== EINTR
);
80 vhost_user_read(int fd
, struct vhost_user_msg
*msg
)
82 uint32_t valid_flags
= VHOST_USER_REPLY_MASK
| VHOST_USER_VERSION
;
83 int ret
, sz_hdr
= VHOST_USER_HDR_SIZE
, sz_payload
;
85 ret
= recv(fd
, (void *)msg
, sz_hdr
, 0);
87 PMD_DRV_LOG(ERR
, "Failed to recv msg hdr: %d instead of %d.",
92 /* validate msg flags */
93 if (msg
->flags
!= (valid_flags
)) {
94 PMD_DRV_LOG(ERR
, "Failed to recv msg: flags %x instead of %x.",
95 msg
->flags
, valid_flags
);
99 sz_payload
= msg
->size
;
101 ret
= recv(fd
, (void *)((char *)msg
+ sz_hdr
), sz_payload
, 0);
102 if (ret
< sz_payload
) {
104 "Failed to recv msg payload: %d instead of %d.",
116 struct hugepage_file_info
{
117 uint64_t addr
; /**< virtual addr */
118 size_t size
; /**< the file size */
119 char path
[PATH_MAX
]; /**< path to backing file */
122 /* Two possible options:
123 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
124 * array. This is simple but cannot be used in secondary process because
125 * secondary process will close and munmap that file.
126 * 2. Match HUGEFILE_FMT to find hugepage files directly.
128 * We choose option 2.
131 get_hugepage_file_info(struct hugepage_file_info huges
[], int max
)
135 char buf
[BUFSIZ
], *tmp
, *tail
;
136 char *str_underline
, *str_start
;
138 uint64_t v_start
, v_end
;
140 f
= fopen("/proc/self/maps", "r");
142 PMD_DRV_LOG(ERR
, "cannot open /proc/self/maps");
147 while (fgets(buf
, sizeof(buf
), f
) != NULL
) {
148 if (sscanf(buf
, "%" PRIx64
"-%" PRIx64
, &v_start
, &v_end
) < 2) {
149 PMD_DRV_LOG(ERR
, "Failed to parse address");
153 tmp
= strchr(buf
, ' ') + 1; /** skip address */
154 tmp
= strchr(tmp
, ' ') + 1; /** skip perm */
155 tmp
= strchr(tmp
, ' ') + 1; /** skip offset */
156 tmp
= strchr(tmp
, ' ') + 1; /** skip dev */
157 tmp
= strchr(tmp
, ' ') + 1; /** skip inode */
158 while (*tmp
== ' ') /** skip spaces */
160 tail
= strrchr(tmp
, '\n'); /** remove newline if exists */
164 /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
165 * which is defined in eal_filesystem.h
167 str_underline
= strrchr(tmp
, '_');
171 str_start
= str_underline
- strlen("map");
175 if (sscanf(str_start
, "map_%d", &huge_index
) != 1)
179 PMD_DRV_LOG(ERR
, "Exceed maximum of %d", max
);
182 huges
[idx
].addr
= v_start
;
183 huges
[idx
].size
= v_end
- v_start
;
184 snprintf(huges
[idx
].path
, PATH_MAX
, "%s", tmp
);
197 prepare_vhost_memory_user(struct vhost_user_msg
*msg
, int fds
[])
200 struct hugepage_file_info huges
[VHOST_MEMORY_MAX_NREGIONS
];
201 struct vhost_memory_region
*mr
;
203 num
= get_hugepage_file_info(huges
, VHOST_MEMORY_MAX_NREGIONS
);
205 PMD_INIT_LOG(ERR
, "Failed to prepare memory for vhost-user");
209 for (i
= 0; i
< num
; ++i
) {
210 mr
= &msg
->payload
.memory
.regions
[i
];
211 mr
->guest_phys_addr
= huges
[i
].addr
; /* use vaddr! */
212 mr
->userspace_addr
= huges
[i
].addr
;
213 mr
->memory_size
= huges
[i
].size
;
215 fds
[i
] = open(huges
[i
].path
, O_RDWR
);
218 msg
->payload
.memory
.nregions
= num
;
219 msg
->payload
.memory
.padding
= 0;
224 static struct vhost_user_msg m
;
226 static const char * const vhost_msg_strings
[] = {
227 [VHOST_USER_SET_OWNER
] = "VHOST_USER_SET_OWNER",
228 [VHOST_USER_RESET_OWNER
] = "VHOST_USER_RESET_OWNER",
229 [VHOST_USER_SET_FEATURES
] = "VHOST_USER_SET_FEATURES",
230 [VHOST_USER_GET_FEATURES
] = "VHOST_USER_GET_FEATURES",
231 [VHOST_USER_SET_VRING_CALL
] = "VHOST_USER_SET_VRING_CALL",
232 [VHOST_USER_SET_VRING_NUM
] = "VHOST_USER_SET_VRING_NUM",
233 [VHOST_USER_SET_VRING_BASE
] = "VHOST_USER_SET_VRING_BASE",
234 [VHOST_USER_GET_VRING_BASE
] = "VHOST_USER_GET_VRING_BASE",
235 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_USER_SET_VRING_ADDR",
236 [VHOST_USER_SET_VRING_KICK
] = "VHOST_USER_SET_VRING_KICK",
237 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_USER_SET_MEM_TABLE",
238 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_USER_SET_VRING_ENABLE",
243 vhost_user_sock(int vhostfd
, enum vhost_user_request req
, void *arg
)
245 struct vhost_user_msg msg
;
246 struct vhost_vring_file
*file
= 0;
248 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
253 RTE_SET_USED(vhost_msg_strings
);
255 PMD_DRV_LOG(INFO
, "%s", vhost_msg_strings
[req
]);
258 msg
.flags
= VHOST_USER_VERSION
;
262 case VHOST_USER_GET_FEATURES
:
266 case VHOST_USER_SET_FEATURES
:
267 case VHOST_USER_SET_LOG_BASE
:
268 msg
.payload
.u64
= *((__u64
*)arg
);
269 msg
.size
= sizeof(m
.payload
.u64
);
272 case VHOST_USER_SET_OWNER
:
273 case VHOST_USER_RESET_OWNER
:
276 case VHOST_USER_SET_MEM_TABLE
:
277 if (prepare_vhost_memory_user(&msg
, fds
) < 0)
279 fd_num
= msg
.payload
.memory
.nregions
;
280 msg
.size
= sizeof(m
.payload
.memory
.nregions
);
281 msg
.size
+= sizeof(m
.payload
.memory
.padding
);
282 msg
.size
+= fd_num
* sizeof(struct vhost_memory_region
);
285 case VHOST_USER_SET_LOG_FD
:
286 fds
[fd_num
++] = *((int *)arg
);
289 case VHOST_USER_SET_VRING_NUM
:
290 case VHOST_USER_SET_VRING_BASE
:
291 case VHOST_USER_SET_VRING_ENABLE
:
292 memcpy(&msg
.payload
.state
, arg
, sizeof(msg
.payload
.state
));
293 msg
.size
= sizeof(m
.payload
.state
);
296 case VHOST_USER_GET_VRING_BASE
:
297 memcpy(&msg
.payload
.state
, arg
, sizeof(msg
.payload
.state
));
298 msg
.size
= sizeof(m
.payload
.state
);
302 case VHOST_USER_SET_VRING_ADDR
:
303 memcpy(&msg
.payload
.addr
, arg
, sizeof(msg
.payload
.addr
));
304 msg
.size
= sizeof(m
.payload
.addr
);
307 case VHOST_USER_SET_VRING_KICK
:
308 case VHOST_USER_SET_VRING_CALL
:
309 case VHOST_USER_SET_VRING_ERR
:
311 msg
.payload
.u64
= file
->index
& VHOST_USER_VRING_IDX_MASK
;
312 msg
.size
= sizeof(m
.payload
.u64
);
314 fds
[fd_num
++] = file
->fd
;
316 msg
.payload
.u64
|= VHOST_USER_VRING_NOFD_MASK
;
320 PMD_DRV_LOG(ERR
, "trying to send unhandled msg type");
324 len
= VHOST_USER_HDR_SIZE
+ msg
.size
;
325 if (vhost_user_write(vhostfd
, &msg
, len
, fds
, fd_num
) < 0) {
326 PMD_DRV_LOG(ERR
, "%s failed: %s",
327 vhost_msg_strings
[req
], strerror(errno
));
331 if (req
== VHOST_USER_SET_MEM_TABLE
)
332 for (i
= 0; i
< fd_num
; ++i
)
336 if (vhost_user_read(vhostfd
, &msg
) < 0) {
337 PMD_DRV_LOG(ERR
, "Received msg failed: %s",
342 if (req
!= msg
.request
) {
343 PMD_DRV_LOG(ERR
, "Received unexpected msg type");
348 case VHOST_USER_GET_FEATURES
:
349 if (msg
.size
!= sizeof(m
.payload
.u64
)) {
350 PMD_DRV_LOG(ERR
, "Received bad msg size");
353 *((__u64
*)arg
) = msg
.payload
.u64
;
355 case VHOST_USER_GET_VRING_BASE
:
356 if (msg
.size
!= sizeof(m
.payload
.state
)) {
357 PMD_DRV_LOG(ERR
, "Received bad msg size");
360 memcpy(arg
, &msg
.payload
.state
,
361 sizeof(struct vhost_vring_state
));
364 PMD_DRV_LOG(ERR
, "Received unexpected msg type");
373 * Set up environment to talk with a vhost user backend.
375 * - The path to vhost user unix socket file.
378 * - (-1) if fail to set up;
379 * - (>=0) if successful, and it is the fd to vhostfd.
382 vhost_user_setup(const char *path
)
386 struct sockaddr_un un
;
388 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
390 PMD_DRV_LOG(ERR
, "socket() error, %s", strerror(errno
));
394 flag
= fcntl(fd
, F_GETFD
);
395 if (fcntl(fd
, F_SETFD
, flag
| FD_CLOEXEC
) < 0)
396 PMD_DRV_LOG(WARNING
, "fcntl failed, %s", strerror(errno
));
398 memset(&un
, 0, sizeof(un
));
399 un
.sun_family
= AF_UNIX
;
400 snprintf(un
.sun_path
, sizeof(un
.sun_path
), "%s", path
);
401 if (connect(fd
, (struct sockaddr
*)&un
, sizeof(un
)) < 0) {
402 PMD_DRV_LOG(ERR
, "connect error, %s", strerror(errno
));
411 vhost_user_enable_queue_pair(int vhostfd
, uint16_t pair_idx
, int enable
)
415 for (i
= 0; i
< 2; ++i
) {
416 struct vhost_vring_state state
= {
417 .index
= pair_idx
* 2 + i
,
421 if (vhost_user_sock(vhostfd
,
422 VHOST_USER_SET_VRING_ENABLE
, &state
))