1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 #include <rte_bus_pci.h>
12 #include <rte_eal_memconfig.h>
13 #include <rte_malloc.h>
14 #include <rte_devargs.h>
15 #include <rte_memcpy.h>
18 #include "eal_filesystem.h"
25 * PCI probing under linux
27 * This code is used to simulate a PCI probe by parsing information in sysfs.
28 * When a registered device matches a driver, it is then initialized with
29 * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
32 extern struct rte_pci_bus rte_pci_bus
;
35 pci_get_kernel_driver_by_path(const char *filename
, char *dri_name
,
42 if (!filename
|| !dri_name
)
45 count
= readlink(filename
, path
, PATH_MAX
);
46 if (count
>= PATH_MAX
)
49 /* For device does not have a driver */
55 name
= strrchr(path
, '/');
57 strlcpy(dri_name
, name
+ 1, len
);
66 rte_pci_map_device(struct rte_pci_device
*dev
)
70 /* try mapping the NIC resources using VFIO if it exists */
74 if (pci_vfio_is_enabled())
75 ret
= pci_vfio_map_resource(dev
);
78 case RTE_KDRV_IGB_UIO
:
79 case RTE_KDRV_UIO_GENERIC
:
80 if (rte_eal_using_phys_addrs()) {
81 /* map resources for devices that use uio */
82 ret
= pci_uio_map_resource(dev
);
87 " Not managed by a supported kernel driver, skipped\n");
95 /* Unmap pci device */
97 rte_pci_unmap_device(struct rte_pci_device
*dev
)
99 /* try unmapping the NIC resources using VFIO if it exists */
103 if (pci_vfio_is_enabled())
104 pci_vfio_unmap_resource(dev
);
107 case RTE_KDRV_IGB_UIO
:
108 case RTE_KDRV_UIO_GENERIC
:
109 /* unmap resources for devices that use uio */
110 pci_uio_unmap_resource(dev
);
114 " Not managed by a supported kernel driver, skipped\n");
120 find_max_end_va(const struct rte_memseg_list
*msl
, void *arg
)
122 size_t sz
= msl
->memseg_arr
.len
* msl
->page_sz
;
123 void *end_va
= RTE_PTR_ADD(msl
->base_va
, sz
);
126 if (*max_va
< end_va
)
132 pci_find_max_end_va(void)
136 rte_memseg_list_walk(find_max_end_va
, &va
);
141 /* parse one line of the "resource" sysfs file (note that the 'line'
142 * string is modified)
145 pci_parse_one_sysfs_resource(char *line
, size_t len
, uint64_t *phys_addr
,
146 uint64_t *end_addr
, uint64_t *flags
)
148 union pci_resource_info
{
154 char *ptrs
[PCI_RESOURCE_FMT_NVAL
];
157 if (rte_strsplit(line
, len
, res_info
.ptrs
, 3, ' ') != 3) {
159 "%s(): bad resource format\n", __func__
);
163 *phys_addr
= strtoull(res_info
.phys_addr
, NULL
, 16);
164 *end_addr
= strtoull(res_info
.end_addr
, NULL
, 16);
165 *flags
= strtoull(res_info
.flags
, NULL
, 16);
168 "%s(): bad resource format\n", __func__
);
175 /* parse the "resource" sysfs file */
177 pci_parse_sysfs_resource(const char *filename
, struct rte_pci_device
*dev
)
182 uint64_t phys_addr
, end_addr
, flags
;
184 f
= fopen(filename
, "r");
186 RTE_LOG(ERR
, EAL
, "Cannot open sysfs resource\n");
190 for (i
= 0; i
<PCI_MAX_RESOURCE
; i
++) {
192 if (fgets(buf
, sizeof(buf
), f
) == NULL
) {
194 "%s(): cannot read resource\n", __func__
);
197 if (pci_parse_one_sysfs_resource(buf
, sizeof(buf
), &phys_addr
,
198 &end_addr
, &flags
) < 0)
201 if (flags
& IORESOURCE_MEM
) {
202 dev
->mem_resource
[i
].phys_addr
= phys_addr
;
203 dev
->mem_resource
[i
].len
= end_addr
- phys_addr
+ 1;
204 /* not mapped for now */
205 dev
->mem_resource
[i
].addr
= NULL
;
216 /* Scan one pci sysfs entry, and fill the devices list from it. */
218 pci_scan_one(const char *dirname
, const struct rte_pci_addr
*addr
)
220 char filename
[PATH_MAX
];
222 struct rte_pci_device
*dev
;
223 char driver
[PATH_MAX
];
226 dev
= malloc(sizeof(*dev
));
230 memset(dev
, 0, sizeof(*dev
));
234 snprintf(filename
, sizeof(filename
), "%s/vendor", dirname
);
235 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
239 dev
->id
.vendor_id
= (uint16_t)tmp
;
242 snprintf(filename
, sizeof(filename
), "%s/device", dirname
);
243 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
247 dev
->id
.device_id
= (uint16_t)tmp
;
249 /* get subsystem_vendor id */
250 snprintf(filename
, sizeof(filename
), "%s/subsystem_vendor",
252 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
256 dev
->id
.subsystem_vendor_id
= (uint16_t)tmp
;
258 /* get subsystem_device id */
259 snprintf(filename
, sizeof(filename
), "%s/subsystem_device",
261 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
265 dev
->id
.subsystem_device_id
= (uint16_t)tmp
;
268 snprintf(filename
, sizeof(filename
), "%s/class",
270 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
274 /* the least 24 bits are valid: class, subclass, program interface */
275 dev
->id
.class_id
= (uint32_t)tmp
& RTE_CLASS_ANY_ID
;
279 snprintf(filename
, sizeof(filename
), "%s/max_vfs", dirname
);
280 if (!access(filename
, F_OK
) &&
281 eal_parse_sysfs_value(filename
, &tmp
) == 0)
282 dev
->max_vfs
= (uint16_t)tmp
;
284 /* for non igb_uio driver, need kernel version >= 3.8 */
285 snprintf(filename
, sizeof(filename
),
286 "%s/sriov_numvfs", dirname
);
287 if (!access(filename
, F_OK
) &&
288 eal_parse_sysfs_value(filename
, &tmp
) == 0)
289 dev
->max_vfs
= (uint16_t)tmp
;
292 /* get numa node, default to 0 if not present */
293 snprintf(filename
, sizeof(filename
), "%s/numa_node",
296 if (access(filename
, F_OK
) != -1) {
297 if (eal_parse_sysfs_value(filename
, &tmp
) == 0)
298 dev
->device
.numa_node
= tmp
;
300 dev
->device
.numa_node
= -1;
302 dev
->device
.numa_node
= 0;
307 /* parse resources */
308 snprintf(filename
, sizeof(filename
), "%s/resource", dirname
);
309 if (pci_parse_sysfs_resource(filename
, dev
) < 0) {
310 RTE_LOG(ERR
, EAL
, "%s(): cannot parse resource\n", __func__
);
316 snprintf(filename
, sizeof(filename
), "%s/driver", dirname
);
317 ret
= pci_get_kernel_driver_by_path(filename
, driver
, sizeof(driver
));
319 RTE_LOG(ERR
, EAL
, "Fail to get kernel driver\n");
325 if (!strcmp(driver
, "vfio-pci"))
326 dev
->kdrv
= RTE_KDRV_VFIO
;
327 else if (!strcmp(driver
, "igb_uio"))
328 dev
->kdrv
= RTE_KDRV_IGB_UIO
;
329 else if (!strcmp(driver
, "uio_pci_generic"))
330 dev
->kdrv
= RTE_KDRV_UIO_GENERIC
;
332 dev
->kdrv
= RTE_KDRV_UNKNOWN
;
334 dev
->kdrv
= RTE_KDRV_NONE
;
336 /* device is valid, add in list (sorted) */
337 if (TAILQ_EMPTY(&rte_pci_bus
.device_list
)) {
338 rte_pci_add_device(dev
);
340 struct rte_pci_device
*dev2
;
343 TAILQ_FOREACH(dev2
, &rte_pci_bus
.device_list
, next
) {
344 ret
= rte_pci_addr_cmp(&dev
->addr
, &dev2
->addr
);
349 rte_pci_insert_device(dev2
, dev
);
350 } else { /* already registered */
351 if (dev2
->driver
== NULL
) {
352 dev2
->kdrv
= dev
->kdrv
;
353 dev2
->max_vfs
= dev
->max_vfs
;
355 memmove(dev2
->mem_resource
,
357 sizeof(dev
->mem_resource
));
360 * If device is plugged and driver is
361 * probed already, we don't need to do
362 * anything here. (This happens when we
363 * call rte_eal_hotplug_add)
365 if (dev2
->kdrv
!= dev
->kdrv
||
366 dev2
->max_vfs
!= dev
->max_vfs
)
368 * This should not happens.
369 * But it is still possible if
370 * we unbind a device from
371 * vfio or uio before hotplug
372 * remove and rebind it with
373 * a different configure.
374 * So we just print out the
377 RTE_LOG(ERR
, EAL
, "Unexpected device scan at %s!\n",
385 rte_pci_add_device(dev
);
392 pci_update_device(const struct rte_pci_addr
*addr
)
394 char filename
[PATH_MAX
];
396 snprintf(filename
, sizeof(filename
), "%s/" PCI_PRI_FMT
,
397 rte_pci_get_sysfs_path(), addr
->domain
, addr
->bus
, addr
->devid
,
400 return pci_scan_one(filename
, addr
);
404 * split up a pci address into its constituent parts.
407 parse_pci_addr_format(const char *buf
, int bufsize
, struct rte_pci_addr
*addr
)
409 /* first split on ':' */
417 char *str
[PCI_FMT_NVAL
]; /* last element-separator is "." not ":" */
420 char *buf_copy
= strndup(buf
, bufsize
);
421 if (buf_copy
== NULL
)
424 if (rte_strsplit(buf_copy
, bufsize
, splitaddr
.str
, PCI_FMT_NVAL
, ':')
427 /* final split is on '.' between devid and function */
428 splitaddr
.function
= strchr(splitaddr
.devid
,'.');
429 if (splitaddr
.function
== NULL
)
431 *splitaddr
.function
++ = '\0';
433 /* now convert to int values */
435 addr
->domain
= strtoul(splitaddr
.domain
, NULL
, 16);
436 addr
->bus
= strtoul(splitaddr
.bus
, NULL
, 16);
437 addr
->devid
= strtoul(splitaddr
.devid
, NULL
, 16);
438 addr
->function
= strtoul(splitaddr
.function
, NULL
, 10);
442 free(buf_copy
); /* free the copy made with strdup */
450 * Scan the content of the PCI bus, and the devices in the devices
458 char dirname
[PATH_MAX
];
459 struct rte_pci_addr addr
;
461 /* for debug purposes, PCI can be disabled */
462 if (!rte_eal_has_pci())
466 if (!pci_vfio_is_enabled())
467 RTE_LOG(DEBUG
, EAL
, "VFIO PCI modules not loaded\n");
470 dir
= opendir(rte_pci_get_sysfs_path());
472 RTE_LOG(ERR
, EAL
, "%s(): opendir failed: %s\n",
473 __func__
, strerror(errno
));
477 while ((e
= readdir(dir
)) != NULL
) {
478 if (e
->d_name
[0] == '.')
481 if (parse_pci_addr_format(e
->d_name
, sizeof(e
->d_name
), &addr
) != 0)
484 snprintf(dirname
, sizeof(dirname
), "%s/%s",
485 rte_pci_get_sysfs_path(), e
->d_name
);
487 if (pci_scan_one(dirname
, &addr
) < 0)
499 * Is pci device bound to any kdrv
502 pci_one_device_is_bound(void)
504 struct rte_pci_device
*dev
= NULL
;
507 FOREACH_DEVICE_ON_PCIBUS(dev
) {
508 if (dev
->kdrv
== RTE_KDRV_UNKNOWN
||
509 dev
->kdrv
== RTE_KDRV_NONE
) {
520 * Any one of the device bound to uio
523 pci_one_device_bound_uio(void)
525 struct rte_pci_device
*dev
= NULL
;
526 struct rte_devargs
*devargs
;
529 FOREACH_DEVICE_ON_PCIBUS(dev
) {
530 devargs
= dev
->device
.devargs
;
533 switch (rte_pci_bus
.bus
.conf
.scan_mode
) {
534 case RTE_BUS_SCAN_WHITELIST
:
535 if (devargs
&& devargs
->policy
== RTE_DEV_WHITELISTED
)
538 case RTE_BUS_SCAN_UNDEFINED
:
539 case RTE_BUS_SCAN_BLACKLIST
:
540 if (devargs
== NULL
||
541 devargs
->policy
!= RTE_DEV_BLACKLISTED
)
549 if (dev
->kdrv
== RTE_KDRV_IGB_UIO
||
550 dev
->kdrv
== RTE_KDRV_UIO_GENERIC
) {
558 * Any one of the device has iova as va
561 pci_one_device_has_iova_va(void)
563 struct rte_pci_device
*dev
= NULL
;
564 struct rte_pci_driver
*drv
= NULL
;
566 FOREACH_DRIVER_ON_PCIBUS(drv
) {
567 if (drv
&& drv
->drv_flags
& RTE_PCI_DRV_IOVA_AS_VA
) {
568 FOREACH_DEVICE_ON_PCIBUS(dev
) {
569 if (dev
->kdrv
== RTE_KDRV_VFIO
&&
570 rte_pci_match(drv
, dev
))
578 #if defined(RTE_ARCH_X86)
580 pci_one_device_iommu_support_va(struct rte_pci_device
*dev
)
582 #define VTD_CAP_MGAW_SHIFT 16
583 #define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
584 #define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */
585 struct rte_pci_addr
*addr
= &dev
->addr
;
586 char filename
[PATH_MAX
];
588 uint64_t mgaw
, vtd_cap_reg
= 0;
590 snprintf(filename
, sizeof(filename
),
591 "%s/" PCI_PRI_FMT
"/iommu/intel-iommu/cap",
592 rte_pci_get_sysfs_path(), addr
->domain
, addr
->bus
, addr
->devid
,
594 if (access(filename
, F_OK
) == -1) {
595 /* We don't have an Intel IOMMU, assume VA supported*/
599 /* We have an intel IOMMU */
600 fp
= fopen(filename
, "r");
602 RTE_LOG(ERR
, EAL
, "%s(): can't open %s\n", __func__
, filename
);
606 if (fscanf(fp
, "%" PRIx64
, &vtd_cap_reg
) != 1) {
607 RTE_LOG(ERR
, EAL
, "%s(): can't read %s\n", __func__
, filename
);
614 mgaw
= ((vtd_cap_reg
& VTD_CAP_MGAW_MASK
) >> VTD_CAP_MGAW_SHIFT
) + 1;
615 if (mgaw
< X86_VA_WIDTH
)
620 #elif defined(RTE_ARCH_PPC_64)
622 pci_one_device_iommu_support_va(__rte_unused
struct rte_pci_device
*dev
)
628 pci_one_device_iommu_support_va(__rte_unused
struct rte_pci_device
*dev
)
635 * All devices IOMMUs support VA as IOVA
638 pci_devices_iommu_support_va(void)
640 struct rte_pci_device
*dev
= NULL
;
641 struct rte_pci_driver
*drv
= NULL
;
643 FOREACH_DRIVER_ON_PCIBUS(drv
) {
644 FOREACH_DEVICE_ON_PCIBUS(dev
) {
645 if (!rte_pci_match(drv
, dev
))
647 if (!pci_one_device_iommu_support_va(dev
))
655 * Get iommu class of PCI devices on the bus.
658 rte_pci_get_iommu_class(void)
661 bool is_vfio_noiommu_enabled
= true;
666 is_bound
= pci_one_device_is_bound();
670 has_iova_va
= pci_one_device_has_iova_va();
671 is_bound_uio
= pci_one_device_bound_uio();
672 iommu_no_va
= !pci_devices_iommu_support_va();
674 is_vfio_noiommu_enabled
= rte_vfio_noiommu_is_enabled() == true ?
678 if (has_iova_va
&& !is_bound_uio
&& !is_vfio_noiommu_enabled
&&
683 RTE_LOG(WARNING
, EAL
, "Some devices want iova as va but pa will be used because.. ");
684 if (is_vfio_noiommu_enabled
)
685 RTE_LOG(WARNING
, EAL
, "vfio-noiommu mode configured\n");
687 RTE_LOG(WARNING
, EAL
, "few device bound to UIO\n");
689 RTE_LOG(WARNING
, EAL
, "IOMMU does not support IOVA as VA\n");
695 /* Read PCI config space. */
696 int rte_pci_read_config(const struct rte_pci_device
*device
,
697 void *buf
, size_t len
, off_t offset
)
699 const struct rte_intr_handle
*intr_handle
= &device
->intr_handle
;
701 switch (intr_handle
->type
) {
702 case RTE_INTR_HANDLE_UIO
:
703 case RTE_INTR_HANDLE_UIO_INTX
:
704 return pci_uio_read_config(intr_handle
, buf
, len
, offset
);
707 case RTE_INTR_HANDLE_VFIO_MSIX
:
708 case RTE_INTR_HANDLE_VFIO_MSI
:
709 case RTE_INTR_HANDLE_VFIO_LEGACY
:
710 return pci_vfio_read_config(intr_handle
, buf
, len
, offset
);
714 "Unknown handle type of fd %d\n",
720 /* Write PCI config space. */
721 int rte_pci_write_config(const struct rte_pci_device
*device
,
722 const void *buf
, size_t len
, off_t offset
)
724 const struct rte_intr_handle
*intr_handle
= &device
->intr_handle
;
726 switch (intr_handle
->type
) {
727 case RTE_INTR_HANDLE_UIO
:
728 case RTE_INTR_HANDLE_UIO_INTX
:
729 return pci_uio_write_config(intr_handle
, buf
, len
, offset
);
732 case RTE_INTR_HANDLE_VFIO_MSIX
:
733 case RTE_INTR_HANDLE_VFIO_MSI
:
734 case RTE_INTR_HANDLE_VFIO_LEGACY
:
735 return pci_vfio_write_config(intr_handle
, buf
, len
, offset
);
739 "Unknown handle type of fd %d\n",
745 #if defined(RTE_ARCH_X86)
747 pci_ioport_map(struct rte_pci_device
*dev
, int bar __rte_unused
,
748 struct rte_pci_ioport
*p
)
757 snprintf(pci_id
, sizeof(pci_id
), PCI_PRI_FMT
,
758 dev
->addr
.domain
, dev
->addr
.bus
,
759 dev
->addr
.devid
, dev
->addr
.function
);
761 fp
= fopen("/proc/ioports", "r");
763 RTE_LOG(ERR
, EAL
, "%s(): can't open ioports\n", __func__
);
767 while (getdelim(&line
, &linesz
, '\n', fp
) > 0) {
772 n
= strcspn(ptr
, ":");
776 while (*left
&& isspace(*left
))
779 if (!strncmp(left
, pci_id
, strlen(pci_id
))) {
782 while (*ptr
&& isspace(*ptr
))
785 sscanf(ptr
, "%04hx-%04hx", &start
, &end
);
798 RTE_LOG(DEBUG
, EAL
, "PCI Port IO found start=0x%x\n", start
);
805 rte_pci_ioport_map(struct rte_pci_device
*dev
, int bar
,
806 struct rte_pci_ioport
*p
)
813 if (pci_vfio_is_enabled())
814 ret
= pci_vfio_ioport_map(dev
, bar
, p
);
817 case RTE_KDRV_IGB_UIO
:
818 ret
= pci_uio_ioport_map(dev
, bar
, p
);
820 case RTE_KDRV_UIO_GENERIC
:
821 #if defined(RTE_ARCH_X86)
822 ret
= pci_ioport_map(dev
, bar
, p
);
824 ret
= pci_uio_ioport_map(dev
, bar
, p
);
828 #if defined(RTE_ARCH_X86)
829 ret
= pci_ioport_map(dev
, bar
, p
);
843 rte_pci_ioport_read(struct rte_pci_ioport
*p
,
844 void *data
, size_t len
, off_t offset
)
846 switch (p
->dev
->kdrv
) {
849 pci_vfio_ioport_read(p
, data
, len
, offset
);
852 case RTE_KDRV_IGB_UIO
:
853 pci_uio_ioport_read(p
, data
, len
, offset
);
855 case RTE_KDRV_UIO_GENERIC
:
856 pci_uio_ioport_read(p
, data
, len
, offset
);
859 #if defined(RTE_ARCH_X86)
860 pci_uio_ioport_read(p
, data
, len
, offset
);
869 rte_pci_ioport_write(struct rte_pci_ioport
*p
,
870 const void *data
, size_t len
, off_t offset
)
872 switch (p
->dev
->kdrv
) {
875 pci_vfio_ioport_write(p
, data
, len
, offset
);
878 case RTE_KDRV_IGB_UIO
:
879 pci_uio_ioport_write(p
, data
, len
, offset
);
881 case RTE_KDRV_UIO_GENERIC
:
882 pci_uio_ioport_write(p
, data
, len
, offset
);
885 #if defined(RTE_ARCH_X86)
886 pci_uio_ioport_write(p
, data
, len
, offset
);
895 rte_pci_ioport_unmap(struct rte_pci_ioport
*p
)
899 switch (p
->dev
->kdrv
) {
902 if (pci_vfio_is_enabled())
903 ret
= pci_vfio_ioport_unmap(p
);
906 case RTE_KDRV_IGB_UIO
:
907 ret
= pci_uio_ioport_unmap(p
);
909 case RTE_KDRV_UIO_GENERIC
:
910 #if defined(RTE_ARCH_X86)
913 ret
= pci_uio_ioport_unmap(p
);
917 #if defined(RTE_ARCH_X86)