]> git.proxmox.com Git - ceph.git/blob - ceph/src/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / dpdk / lib / librte_eal / linuxapp / eal / eal_vfio.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <string.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <sys/ioctl.h>
38
39 #include <rte_log.h>
40 #include <rte_memory.h>
41 #include <rte_eal_memconfig.h>
42
43 #include "eal_filesystem.h"
44 #include "eal_vfio.h"
45 #include "eal_private.h"
46
47 #ifdef VFIO_PRESENT
48
49 /* per-process VFIO config */
50 static struct vfio_config vfio_cfg;
51
52 static int vfio_type1_dma_map(int);
53 static int vfio_noiommu_dma_map(int);
54
55 /* IOMMU types we support */
56 static const struct vfio_iommu_type iommu_types[] = {
57 /* x86 IOMMU, otherwise known as type 1 */
58 { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
59 /* IOMMU-less mode */
60 { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
61 };
62
63 int
64 vfio_get_group_fd(int iommu_group_no)
65 {
66 int i;
67 int vfio_group_fd;
68 char filename[PATH_MAX];
69
70 /* check if we already have the group descriptor open */
71 for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
72 if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
73 return vfio_cfg.vfio_groups[i].fd;
74
75 /* if primary, try to open the group */
76 if (internal_config.process_type == RTE_PROC_PRIMARY) {
77 /* try regular group format */
78 snprintf(filename, sizeof(filename),
79 VFIO_GROUP_FMT, iommu_group_no);
80 vfio_group_fd = open(filename, O_RDWR);
81 if (vfio_group_fd < 0) {
82 /* if file not found, it's not an error */
83 if (errno != ENOENT) {
84 RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
85 strerror(errno));
86 return -1;
87 }
88
89 /* special case: try no-IOMMU path as well */
90 snprintf(filename, sizeof(filename),
91 VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
92 vfio_group_fd = open(filename, O_RDWR);
93 if (vfio_group_fd < 0) {
94 if (errno != ENOENT) {
95 RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
96 strerror(errno));
97 return -1;
98 }
99 return 0;
100 }
101 /* noiommu group found */
102 }
103
104 /* if the fd is valid, create a new group for it */
105 if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
106 RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
107 close(vfio_group_fd);
108 return -1;
109 }
110 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
111 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
112 return vfio_group_fd;
113 }
114 /* if we're in a secondary process, request group fd from the primary
115 * process via our socket
116 */
117 else {
118 int socket_fd, ret;
119
120 socket_fd = vfio_mp_sync_connect_to_primary();
121
122 if (socket_fd < 0) {
123 RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
124 return -1;
125 }
126 if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
127 RTE_LOG(ERR, EAL, " cannot request container fd!\n");
128 close(socket_fd);
129 return -1;
130 }
131 if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
132 RTE_LOG(ERR, EAL, " cannot send group number!\n");
133 close(socket_fd);
134 return -1;
135 }
136 ret = vfio_mp_sync_receive_request(socket_fd);
137 switch (ret) {
138 case SOCKET_NO_FD:
139 close(socket_fd);
140 return 0;
141 case SOCKET_OK:
142 vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
143 /* if we got the fd, return it */
144 if (vfio_group_fd > 0) {
145 close(socket_fd);
146 return vfio_group_fd;
147 }
148 /* fall-through on error */
149 default:
150 RTE_LOG(ERR, EAL, " cannot get container fd!\n");
151 close(socket_fd);
152 return -1;
153 }
154 }
155 return -1;
156 }
157
158 static void
159 clear_current_group(void)
160 {
161 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
162 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
163 }
164
165 int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
166 int *vfio_dev_fd, struct vfio_device_info *device_info)
167 {
168 struct vfio_group_status group_status = {
169 .argsz = sizeof(group_status)
170 };
171 int vfio_group_fd;
172 int iommu_group_no;
173 int ret;
174
175 /* get group number */
176 ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
177 if (ret == 0) {
178 RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
179 dev_addr);
180 return 1;
181 }
182
183 /* if negative, something failed */
184 if (ret < 0)
185 return -1;
186
187 /* get the actual group fd */
188 vfio_group_fd = vfio_get_group_fd(iommu_group_no);
189 if (vfio_group_fd < 0)
190 return -1;
191
192 /* store group fd */
193 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
194 vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
195
196 /* if group_fd == 0, that means the device isn't managed by VFIO */
197 if (vfio_group_fd == 0) {
198 RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
199 dev_addr);
200 /* we store 0 as group fd to distinguish between existing but
201 * unbound VFIO groups, and groups that don't exist at all.
202 */
203 vfio_cfg.vfio_group_idx++;
204 return 1;
205 }
206
207 /*
208 * at this point, we know that this group is viable (meaning, all devices
209 * are either bound to VFIO or not bound to anything)
210 */
211
212 /* check if the group is viable */
213 ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
214 if (ret) {
215 RTE_LOG(ERR, EAL, " %s cannot get group status, "
216 "error %i (%s)\n", dev_addr, errno, strerror(errno));
217 close(vfio_group_fd);
218 clear_current_group();
219 return -1;
220 } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
221 RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr);
222 close(vfio_group_fd);
223 clear_current_group();
224 return -1;
225 }
226
227 /* check if group does not have a container yet */
228 if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
229
230 /* add group to a container */
231 ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
232 &vfio_cfg.vfio_container_fd);
233 if (ret) {
234 RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
235 "error %i (%s)\n", dev_addr, errno, strerror(errno));
236 close(vfio_group_fd);
237 clear_current_group();
238 return -1;
239 }
240 /*
241 * at this point we know that this group has been successfully
242 * initialized, so we increment vfio_group_idx to indicate that we can
243 * add new groups.
244 */
245 vfio_cfg.vfio_group_idx++;
246 }
247
248 /*
249 * pick an IOMMU type and set up DMA mappings for container
250 *
251 * needs to be done only once, only when at least one group is assigned to
252 * a container and only in primary process
253 */
254 if (internal_config.process_type == RTE_PROC_PRIMARY &&
255 vfio_cfg.vfio_container_has_dma == 0) {
256 /* select an IOMMU type which we will be using */
257 const struct vfio_iommu_type *t =
258 vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
259 if (!t) {
260 RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr);
261 return -1;
262 }
263 ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
264 if (ret) {
265 RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
266 "error %i (%s)\n", dev_addr, errno, strerror(errno));
267 return -1;
268 }
269 vfio_cfg.vfio_container_has_dma = 1;
270 }
271
272 /* get a file descriptor for the device */
273 *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
274 if (*vfio_dev_fd < 0) {
275 /* if we cannot get a device fd, this simply means that this
276 * particular port is not bound to VFIO
277 */
278 RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
279 dev_addr);
280 return 1;
281 }
282
283 /* test and setup the device */
284 ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
285 if (ret) {
286 RTE_LOG(ERR, EAL, " %s cannot get device info, "
287 "error %i (%s)\n", dev_addr, errno, strerror(errno));
288 close(*vfio_dev_fd);
289 return -1;
290 }
291
292 return 0;
293 }
294
295 int
296 vfio_enable(const char *modname)
297 {
298 /* initialize group list */
299 int i;
300 int vfio_available;
301
302 for (i = 0; i < VFIO_MAX_GROUPS; i++) {
303 vfio_cfg.vfio_groups[i].fd = -1;
304 vfio_cfg.vfio_groups[i].group_no = -1;
305 }
306
307 /* inform the user that we are probing for VFIO */
308 RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
309
310 /* check if vfio-pci module is loaded */
311 vfio_available = rte_eal_check_module(modname);
312
313 /* return error directly */
314 if (vfio_available == -1) {
315 RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
316 return -1;
317 }
318
319 /* return 0 if VFIO modules not loaded */
320 if (vfio_available == 0) {
321 RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
322 "skipping VFIO support...\n");
323 return 0;
324 }
325
326 vfio_cfg.vfio_container_fd = vfio_get_container_fd();
327
328 /* check if we have VFIO driver enabled */
329 if (vfio_cfg.vfio_container_fd != -1) {
330 RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
331 vfio_cfg.vfio_enabled = 1;
332 } else {
333 RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
334 }
335
336 return 0;
337 }
338
339 int
340 vfio_is_enabled(const char *modname)
341 {
342 const int mod_available = rte_eal_check_module(modname);
343 return vfio_cfg.vfio_enabled && mod_available;
344 }
345
346 const struct vfio_iommu_type *
347 vfio_set_iommu_type(int vfio_container_fd)
348 {
349 unsigned idx;
350 for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
351 const struct vfio_iommu_type *t = &iommu_types[idx];
352
353 int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
354 t->type_id);
355 if (!ret) {
356 RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
357 t->type_id, t->name);
358 return t;
359 }
360 /* not an error, there may be more supported IOMMU types */
361 RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
362 "error %i (%s)\n", t->type_id, t->name, errno,
363 strerror(errno));
364 }
365 /* if we didn't find a suitable IOMMU type, fail */
366 return NULL;
367 }
368
369 int
370 vfio_has_supported_extensions(int vfio_container_fd)
371 {
372 int ret;
373 unsigned idx, n_extensions = 0;
374 for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
375 const struct vfio_iommu_type *t = &iommu_types[idx];
376
377 ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
378 t->type_id);
379 if (ret < 0) {
380 RTE_LOG(ERR, EAL, " could not get IOMMU type, "
381 "error %i (%s)\n", errno,
382 strerror(errno));
383 close(vfio_container_fd);
384 return -1;
385 } else if (ret == 1) {
386 /* we found a supported extension */
387 n_extensions++;
388 }
389 RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
390 t->type_id, t->name,
391 ret ? "supported" : "not supported");
392 }
393
394 /* if we didn't find any supported IOMMU types, fail */
395 if (!n_extensions) {
396 close(vfio_container_fd);
397 return -1;
398 }
399
400 return 0;
401 }
402
403 int
404 vfio_get_container_fd(void)
405 {
406 int ret, vfio_container_fd;
407
408 /* if we're in a primary process, try to open the container */
409 if (internal_config.process_type == RTE_PROC_PRIMARY) {
410 vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
411 if (vfio_container_fd < 0) {
412 RTE_LOG(ERR, EAL, " cannot open VFIO container, "
413 "error %i (%s)\n", errno, strerror(errno));
414 return -1;
415 }
416
417 /* check VFIO API version */
418 ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
419 if (ret != VFIO_API_VERSION) {
420 if (ret < 0)
421 RTE_LOG(ERR, EAL, " could not get VFIO API version, "
422 "error %i (%s)\n", errno, strerror(errno));
423 else
424 RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
425 close(vfio_container_fd);
426 return -1;
427 }
428
429 ret = vfio_has_supported_extensions(vfio_container_fd);
430 if (ret) {
431 RTE_LOG(ERR, EAL, " no supported IOMMU "
432 "extensions found!\n");
433 return -1;
434 }
435
436 return vfio_container_fd;
437 } else {
438 /*
439 * if we're in a secondary process, request container fd from the
440 * primary process via our socket
441 */
442 int socket_fd;
443
444 socket_fd = vfio_mp_sync_connect_to_primary();
445 if (socket_fd < 0) {
446 RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
447 return -1;
448 }
449 if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
450 RTE_LOG(ERR, EAL, " cannot request container fd!\n");
451 close(socket_fd);
452 return -1;
453 }
454 vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
455 if (vfio_container_fd < 0) {
456 RTE_LOG(ERR, EAL, " cannot get container fd!\n");
457 close(socket_fd);
458 return -1;
459 }
460 close(socket_fd);
461 return vfio_container_fd;
462 }
463
464 return -1;
465 }
466
467 int
468 vfio_get_group_no(const char *sysfs_base,
469 const char *dev_addr, int *iommu_group_no)
470 {
471 char linkname[PATH_MAX];
472 char filename[PATH_MAX];
473 char *tok[16], *group_tok, *end;
474 int ret;
475
476 memset(linkname, 0, sizeof(linkname));
477 memset(filename, 0, sizeof(filename));
478
479 /* try to find out IOMMU group for this device */
480 snprintf(linkname, sizeof(linkname),
481 "%s/%s/iommu_group", sysfs_base, dev_addr);
482
483 ret = readlink(linkname, filename, sizeof(filename));
484
485 /* if the link doesn't exist, no VFIO for us */
486 if (ret < 0)
487 return 0;
488
489 ret = rte_strsplit(filename, sizeof(filename),
490 tok, RTE_DIM(tok), '/');
491
492 if (ret <= 0) {
493 RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr);
494 return -1;
495 }
496
497 /* IOMMU group is always the last token */
498 errno = 0;
499 group_tok = tok[ret - 1];
500 end = group_tok;
501 *iommu_group_no = strtol(group_tok, &end, 10);
502 if ((end != group_tok && *end != '\0') || errno != 0) {
503 RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr);
504 return -1;
505 }
506
507 return 1;
508 }
509
510 static int
511 vfio_type1_dma_map(int vfio_container_fd)
512 {
513 const struct rte_memseg *ms = rte_eal_get_physmem_layout();
514 int i, ret;
515
516 /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
517 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
518 struct vfio_iommu_type1_dma_map dma_map;
519
520 if (ms[i].addr == NULL)
521 break;
522
523 memset(&dma_map, 0, sizeof(dma_map));
524 dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
525 dma_map.vaddr = ms[i].addr_64;
526 dma_map.size = ms[i].len;
527 dma_map.iova = ms[i].phys_addr;
528 dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
529
530 ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
531
532 if (ret) {
533 RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
534 "error %i (%s)\n", errno, strerror(errno));
535 return -1;
536 }
537 }
538
539 return 0;
540 }
541
542 static int
543 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
544 {
545 /* No-IOMMU mode does not need DMA mapping */
546 return 0;
547 }
548
549 #endif