4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <sys/ioctl.h>
40 #include <rte_memory.h>
41 #include <rte_eal_memconfig.h>
43 #include "eal_filesystem.h"
45 #include "eal_private.h"
49 /* per-process VFIO config */
50 static struct vfio_config vfio_cfg
;
52 static int vfio_type1_dma_map(int);
53 static int vfio_noiommu_dma_map(int);
55 /* IOMMU types we support */
56 static const struct vfio_iommu_type iommu_types
[] = {
57 /* x86 IOMMU, otherwise known as type 1 */
58 { RTE_VFIO_TYPE1
, "Type 1", &vfio_type1_dma_map
},
60 { RTE_VFIO_NOIOMMU
, "No-IOMMU", &vfio_noiommu_dma_map
},
64 vfio_get_group_fd(int iommu_group_no
)
68 char filename
[PATH_MAX
];
70 /* check if we already have the group descriptor open */
71 for (i
= 0; i
< vfio_cfg
.vfio_group_idx
; i
++)
72 if (vfio_cfg
.vfio_groups
[i
].group_no
== iommu_group_no
)
73 return vfio_cfg
.vfio_groups
[i
].fd
;
75 /* if primary, try to open the group */
76 if (internal_config
.process_type
== RTE_PROC_PRIMARY
) {
77 /* try regular group format */
78 snprintf(filename
, sizeof(filename
),
79 VFIO_GROUP_FMT
, iommu_group_no
);
80 vfio_group_fd
= open(filename
, O_RDWR
);
81 if (vfio_group_fd
< 0) {
82 /* if file not found, it's not an error */
83 if (errno
!= ENOENT
) {
84 RTE_LOG(ERR
, EAL
, "Cannot open %s: %s\n", filename
,
89 /* special case: try no-IOMMU path as well */
90 snprintf(filename
, sizeof(filename
),
91 VFIO_NOIOMMU_GROUP_FMT
, iommu_group_no
);
92 vfio_group_fd
= open(filename
, O_RDWR
);
93 if (vfio_group_fd
< 0) {
94 if (errno
!= ENOENT
) {
95 RTE_LOG(ERR
, EAL
, "Cannot open %s: %s\n", filename
,
101 /* noiommu group found */
104 /* if the fd is valid, create a new group for it */
105 if (vfio_cfg
.vfio_group_idx
== VFIO_MAX_GROUPS
) {
106 RTE_LOG(ERR
, EAL
, "Maximum number of VFIO groups reached!\n");
107 close(vfio_group_fd
);
110 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].group_no
= iommu_group_no
;
111 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].fd
= vfio_group_fd
;
112 return vfio_group_fd
;
114 /* if we're in a secondary process, request group fd from the primary
115 * process via our socket
120 socket_fd
= vfio_mp_sync_connect_to_primary();
123 RTE_LOG(ERR
, EAL
, " cannot connect to primary process!\n");
126 if (vfio_mp_sync_send_request(socket_fd
, SOCKET_REQ_GROUP
) < 0) {
127 RTE_LOG(ERR
, EAL
, " cannot request container fd!\n");
131 if (vfio_mp_sync_send_request(socket_fd
, iommu_group_no
) < 0) {
132 RTE_LOG(ERR
, EAL
, " cannot send group number!\n");
136 ret
= vfio_mp_sync_receive_request(socket_fd
);
142 vfio_group_fd
= vfio_mp_sync_receive_fd(socket_fd
);
143 /* if we got the fd, return it */
144 if (vfio_group_fd
> 0) {
146 return vfio_group_fd
;
148 /* fall-through on error */
150 RTE_LOG(ERR
, EAL
, " cannot get container fd!\n");
159 clear_current_group(void)
161 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].group_no
= 0;
162 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].fd
= -1;
165 int vfio_setup_device(const char *sysfs_base
, const char *dev_addr
,
166 int *vfio_dev_fd
, struct vfio_device_info
*device_info
)
168 struct vfio_group_status group_status
= {
169 .argsz
= sizeof(group_status
)
175 /* get group number */
176 ret
= vfio_get_group_no(sysfs_base
, dev_addr
, &iommu_group_no
);
178 RTE_LOG(WARNING
, EAL
, " %s not managed by VFIO driver, skipping\n",
183 /* if negative, something failed */
187 /* get the actual group fd */
188 vfio_group_fd
= vfio_get_group_fd(iommu_group_no
);
189 if (vfio_group_fd
< 0)
193 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].group_no
= iommu_group_no
;
194 vfio_cfg
.vfio_groups
[vfio_cfg
.vfio_group_idx
].fd
= vfio_group_fd
;
196 /* if group_fd == 0, that means the device isn't managed by VFIO */
197 if (vfio_group_fd
== 0) {
198 RTE_LOG(WARNING
, EAL
, " %s not managed by VFIO driver, skipping\n",
200 /* we store 0 as group fd to distinguish between existing but
201 * unbound VFIO groups, and groups that don't exist at all.
203 vfio_cfg
.vfio_group_idx
++;
208 * at this point, we know that this group is viable (meaning, all devices
209 * are either bound to VFIO or not bound to anything)
212 /* check if the group is viable */
213 ret
= ioctl(vfio_group_fd
, VFIO_GROUP_GET_STATUS
, &group_status
);
215 RTE_LOG(ERR
, EAL
, " %s cannot get group status, "
216 "error %i (%s)\n", dev_addr
, errno
, strerror(errno
));
217 close(vfio_group_fd
);
218 clear_current_group();
220 } else if (!(group_status
.flags
& VFIO_GROUP_FLAGS_VIABLE
)) {
221 RTE_LOG(ERR
, EAL
, " %s VFIO group is not viable!\n", dev_addr
);
222 close(vfio_group_fd
);
223 clear_current_group();
227 /* check if group does not have a container yet */
228 if (!(group_status
.flags
& VFIO_GROUP_FLAGS_CONTAINER_SET
)) {
230 /* add group to a container */
231 ret
= ioctl(vfio_group_fd
, VFIO_GROUP_SET_CONTAINER
,
232 &vfio_cfg
.vfio_container_fd
);
234 RTE_LOG(ERR
, EAL
, " %s cannot add VFIO group to container, "
235 "error %i (%s)\n", dev_addr
, errno
, strerror(errno
));
236 close(vfio_group_fd
);
237 clear_current_group();
241 * at this point we know that this group has been successfully
242 * initialized, so we increment vfio_group_idx to indicate that we can
245 vfio_cfg
.vfio_group_idx
++;
249 * pick an IOMMU type and set up DMA mappings for container
251 * needs to be done only once, only when at least one group is assigned to
252 * a container and only in primary process
254 if (internal_config
.process_type
== RTE_PROC_PRIMARY
&&
255 vfio_cfg
.vfio_container_has_dma
== 0) {
256 /* select an IOMMU type which we will be using */
257 const struct vfio_iommu_type
*t
=
258 vfio_set_iommu_type(vfio_cfg
.vfio_container_fd
);
260 RTE_LOG(ERR
, EAL
, " %s failed to select IOMMU type\n", dev_addr
);
263 ret
= t
->dma_map_func(vfio_cfg
.vfio_container_fd
);
265 RTE_LOG(ERR
, EAL
, " %s DMA remapping failed, "
266 "error %i (%s)\n", dev_addr
, errno
, strerror(errno
));
269 vfio_cfg
.vfio_container_has_dma
= 1;
272 /* get a file descriptor for the device */
273 *vfio_dev_fd
= ioctl(vfio_group_fd
, VFIO_GROUP_GET_DEVICE_FD
, dev_addr
);
274 if (*vfio_dev_fd
< 0) {
275 /* if we cannot get a device fd, this simply means that this
276 * particular port is not bound to VFIO
278 RTE_LOG(WARNING
, EAL
, " %s not managed by VFIO driver, skipping\n",
283 /* test and setup the device */
284 ret
= ioctl(*vfio_dev_fd
, VFIO_DEVICE_GET_INFO
, device_info
);
286 RTE_LOG(ERR
, EAL
, " %s cannot get device info, "
287 "error %i (%s)\n", dev_addr
, errno
, strerror(errno
));
296 vfio_enable(const char *modname
)
298 /* initialize group list */
302 for (i
= 0; i
< VFIO_MAX_GROUPS
; i
++) {
303 vfio_cfg
.vfio_groups
[i
].fd
= -1;
304 vfio_cfg
.vfio_groups
[i
].group_no
= -1;
307 /* inform the user that we are probing for VFIO */
308 RTE_LOG(INFO
, EAL
, "Probing VFIO support...\n");
310 /* check if vfio-pci module is loaded */
311 vfio_available
= rte_eal_check_module(modname
);
313 /* return error directly */
314 if (vfio_available
== -1) {
315 RTE_LOG(INFO
, EAL
, "Could not get loaded module details!\n");
319 /* return 0 if VFIO modules not loaded */
320 if (vfio_available
== 0) {
321 RTE_LOG(DEBUG
, EAL
, "VFIO modules not loaded, "
322 "skipping VFIO support...\n");
326 vfio_cfg
.vfio_container_fd
= vfio_get_container_fd();
328 /* check if we have VFIO driver enabled */
329 if (vfio_cfg
.vfio_container_fd
!= -1) {
330 RTE_LOG(NOTICE
, EAL
, "VFIO support initialized\n");
331 vfio_cfg
.vfio_enabled
= 1;
333 RTE_LOG(NOTICE
, EAL
, "VFIO support could not be initialized\n");
340 vfio_is_enabled(const char *modname
)
342 const int mod_available
= rte_eal_check_module(modname
);
343 return vfio_cfg
.vfio_enabled
&& mod_available
;
346 const struct vfio_iommu_type
*
347 vfio_set_iommu_type(int vfio_container_fd
)
350 for (idx
= 0; idx
< RTE_DIM(iommu_types
); idx
++) {
351 const struct vfio_iommu_type
*t
= &iommu_types
[idx
];
353 int ret
= ioctl(vfio_container_fd
, VFIO_SET_IOMMU
,
356 RTE_LOG(NOTICE
, EAL
, " using IOMMU type %d (%s)\n",
357 t
->type_id
, t
->name
);
360 /* not an error, there may be more supported IOMMU types */
361 RTE_LOG(DEBUG
, EAL
, " set IOMMU type %d (%s) failed, "
362 "error %i (%s)\n", t
->type_id
, t
->name
, errno
,
365 /* if we didn't find a suitable IOMMU type, fail */
370 vfio_has_supported_extensions(int vfio_container_fd
)
373 unsigned idx
, n_extensions
= 0;
374 for (idx
= 0; idx
< RTE_DIM(iommu_types
); idx
++) {
375 const struct vfio_iommu_type
*t
= &iommu_types
[idx
];
377 ret
= ioctl(vfio_container_fd
, VFIO_CHECK_EXTENSION
,
380 RTE_LOG(ERR
, EAL
, " could not get IOMMU type, "
381 "error %i (%s)\n", errno
,
383 close(vfio_container_fd
);
385 } else if (ret
== 1) {
386 /* we found a supported extension */
389 RTE_LOG(DEBUG
, EAL
, " IOMMU type %d (%s) is %s\n",
391 ret
? "supported" : "not supported");
394 /* if we didn't find any supported IOMMU types, fail */
396 close(vfio_container_fd
);
404 vfio_get_container_fd(void)
406 int ret
, vfio_container_fd
;
408 /* if we're in a primary process, try to open the container */
409 if (internal_config
.process_type
== RTE_PROC_PRIMARY
) {
410 vfio_container_fd
= open(VFIO_CONTAINER_PATH
, O_RDWR
);
411 if (vfio_container_fd
< 0) {
412 RTE_LOG(ERR
, EAL
, " cannot open VFIO container, "
413 "error %i (%s)\n", errno
, strerror(errno
));
417 /* check VFIO API version */
418 ret
= ioctl(vfio_container_fd
, VFIO_GET_API_VERSION
);
419 if (ret
!= VFIO_API_VERSION
) {
421 RTE_LOG(ERR
, EAL
, " could not get VFIO API version, "
422 "error %i (%s)\n", errno
, strerror(errno
));
424 RTE_LOG(ERR
, EAL
, " unsupported VFIO API version!\n");
425 close(vfio_container_fd
);
429 ret
= vfio_has_supported_extensions(vfio_container_fd
);
431 RTE_LOG(ERR
, EAL
, " no supported IOMMU "
432 "extensions found!\n");
436 return vfio_container_fd
;
439 * if we're in a secondary process, request container fd from the
440 * primary process via our socket
444 socket_fd
= vfio_mp_sync_connect_to_primary();
446 RTE_LOG(ERR
, EAL
, " cannot connect to primary process!\n");
449 if (vfio_mp_sync_send_request(socket_fd
, SOCKET_REQ_CONTAINER
) < 0) {
450 RTE_LOG(ERR
, EAL
, " cannot request container fd!\n");
454 vfio_container_fd
= vfio_mp_sync_receive_fd(socket_fd
);
455 if (vfio_container_fd
< 0) {
456 RTE_LOG(ERR
, EAL
, " cannot get container fd!\n");
461 return vfio_container_fd
;
468 vfio_get_group_no(const char *sysfs_base
,
469 const char *dev_addr
, int *iommu_group_no
)
471 char linkname
[PATH_MAX
];
472 char filename
[PATH_MAX
];
473 char *tok
[16], *group_tok
, *end
;
476 memset(linkname
, 0, sizeof(linkname
));
477 memset(filename
, 0, sizeof(filename
));
479 /* try to find out IOMMU group for this device */
480 snprintf(linkname
, sizeof(linkname
),
481 "%s/%s/iommu_group", sysfs_base
, dev_addr
);
483 ret
= readlink(linkname
, filename
, sizeof(filename
));
485 /* if the link doesn't exist, no VFIO for us */
489 ret
= rte_strsplit(filename
, sizeof(filename
),
490 tok
, RTE_DIM(tok
), '/');
493 RTE_LOG(ERR
, EAL
, " %s cannot get IOMMU group\n", dev_addr
);
497 /* IOMMU group is always the last token */
499 group_tok
= tok
[ret
- 1];
501 *iommu_group_no
= strtol(group_tok
, &end
, 10);
502 if ((end
!= group_tok
&& *end
!= '\0') || errno
!= 0) {
503 RTE_LOG(ERR
, EAL
, " %s error parsing IOMMU number!\n", dev_addr
);
511 vfio_type1_dma_map(int vfio_container_fd
)
513 const struct rte_memseg
*ms
= rte_eal_get_physmem_layout();
516 /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
517 for (i
= 0; i
< RTE_MAX_MEMSEG
; i
++) {
518 struct vfio_iommu_type1_dma_map dma_map
;
520 if (ms
[i
].addr
== NULL
)
523 memset(&dma_map
, 0, sizeof(dma_map
));
524 dma_map
.argsz
= sizeof(struct vfio_iommu_type1_dma_map
);
525 dma_map
.vaddr
= ms
[i
].addr_64
;
526 dma_map
.size
= ms
[i
].len
;
527 dma_map
.iova
= ms
[i
].phys_addr
;
528 dma_map
.flags
= VFIO_DMA_MAP_FLAG_READ
| VFIO_DMA_MAP_FLAG_WRITE
;
530 ret
= ioctl(vfio_container_fd
, VFIO_IOMMU_MAP_DMA
, &dma_map
);
533 RTE_LOG(ERR
, EAL
, " cannot set up DMA remapping, "
534 "error %i (%s)\n", errno
, strerror(errno
));
543 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd
)
545 /* No-IOMMU mode does not need DMA mapping */