4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <rte_eal_memconfig.h>
40 #include <rte_malloc.h>
41 #include <rte_devargs.h>
42 #include <rte_memcpy.h>
44 #include "eal_filesystem.h"
45 #include "eal_private.h"
46 #include "eal_pci_init.h"
50 * PCI probing under linux
52 * This code is used to simulate a PCI probe by parsing information in sysfs.
53 * When a registered device matches a driver, it is then initialized with
54 * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
57 /* unbind kernel driver for this device */
59 pci_unbind_kernel_driver(struct rte_pci_device
*dev
)
63 char filename
[PATH_MAX
];
65 struct rte_pci_addr
*loc
= &dev
->addr
;
67 /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
68 snprintf(filename
, sizeof(filename
),
69 "%s/" PCI_PRI_FMT
"/driver/unbind", pci_get_sysfs_path(),
70 loc
->domain
, loc
->bus
, loc
->devid
, loc
->function
);
72 f
= fopen(filename
, "w");
73 if (f
== NULL
) /* device was not bound */
76 n
= snprintf(buf
, sizeof(buf
), PCI_PRI_FMT
"\n",
77 loc
->domain
, loc
->bus
, loc
->devid
, loc
->function
);
78 if ((n
< 0) || (n
>= (int)sizeof(buf
))) {
79 RTE_LOG(ERR
, EAL
, "%s(): snprintf failed\n", __func__
);
82 if (fwrite(buf
, n
, 1, f
) == 0) {
83 RTE_LOG(ERR
, EAL
, "%s(): could not write to %s\n", __func__
,
97 pci_get_kernel_driver_by_path(const char *filename
, char *dri_name
)
103 if (!filename
|| !dri_name
)
106 count
= readlink(filename
, path
, PATH_MAX
);
107 if (count
>= PATH_MAX
)
110 /* For device does not have a driver */
116 name
= strrchr(path
, '/');
118 strncpy(dri_name
, name
+ 1, strlen(name
+ 1) + 1);
127 rte_eal_pci_map_device(struct rte_pci_device
*dev
)
131 /* try mapping the NIC resources using VFIO if it exists */
135 if (pci_vfio_is_enabled())
136 ret
= pci_vfio_map_resource(dev
);
139 case RTE_KDRV_IGB_UIO
:
140 case RTE_KDRV_UIO_GENERIC
:
141 /* map resources for devices that use uio */
142 ret
= pci_uio_map_resource(dev
);
146 " Not managed by a supported kernel driver, skipped\n");
154 /* Unmap pci device */
156 rte_eal_pci_unmap_device(struct rte_pci_device
*dev
)
158 /* try unmapping the NIC resources using VFIO if it exists */
161 RTE_LOG(ERR
, EAL
, "Hotplug doesn't support vfio yet\n");
163 case RTE_KDRV_IGB_UIO
:
164 case RTE_KDRV_UIO_GENERIC
:
165 /* unmap resources for devices that use uio */
166 pci_uio_unmap_resource(dev
);
170 " Not managed by a supported kernel driver, skipped\n");
176 pci_find_max_end_va(void)
178 const struct rte_memseg
*seg
= rte_eal_get_physmem_layout();
179 const struct rte_memseg
*last
= seg
;
182 for (i
= 0; i
< RTE_MAX_MEMSEG
; i
++, seg
++) {
183 if (seg
->addr
== NULL
)
186 if (seg
->addr
> last
->addr
)
190 return RTE_PTR_ADD(last
->addr
, last
->len
);
193 /* parse one line of the "resource" sysfs file (note that the 'line'
194 * string is modified)
197 pci_parse_one_sysfs_resource(char *line
, size_t len
, uint64_t *phys_addr
,
198 uint64_t *end_addr
, uint64_t *flags
)
200 union pci_resource_info
{
206 char *ptrs
[PCI_RESOURCE_FMT_NVAL
];
209 if (rte_strsplit(line
, len
, res_info
.ptrs
, 3, ' ') != 3) {
211 "%s(): bad resource format\n", __func__
);
215 *phys_addr
= strtoull(res_info
.phys_addr
, NULL
, 16);
216 *end_addr
= strtoull(res_info
.end_addr
, NULL
, 16);
217 *flags
= strtoull(res_info
.flags
, NULL
, 16);
220 "%s(): bad resource format\n", __func__
);
227 /* parse the "resource" sysfs file */
229 pci_parse_sysfs_resource(const char *filename
, struct rte_pci_device
*dev
)
234 uint64_t phys_addr
, end_addr
, flags
;
236 f
= fopen(filename
, "r");
238 RTE_LOG(ERR
, EAL
, "Cannot open sysfs resource\n");
242 for (i
= 0; i
<PCI_MAX_RESOURCE
; i
++) {
244 if (fgets(buf
, sizeof(buf
), f
) == NULL
) {
246 "%s(): cannot read resource\n", __func__
);
249 if (pci_parse_one_sysfs_resource(buf
, sizeof(buf
), &phys_addr
,
250 &end_addr
, &flags
) < 0)
253 if (flags
& IORESOURCE_MEM
) {
254 dev
->mem_resource
[i
].phys_addr
= phys_addr
;
255 dev
->mem_resource
[i
].len
= end_addr
- phys_addr
+ 1;
256 /* not mapped for now */
257 dev
->mem_resource
[i
].addr
= NULL
;
268 /* Scan one pci sysfs entry, and fill the devices list from it. */
270 pci_scan_one(const char *dirname
, uint16_t domain
, uint8_t bus
,
271 uint8_t devid
, uint8_t function
)
273 char filename
[PATH_MAX
];
275 struct rte_pci_device
*dev
;
276 char driver
[PATH_MAX
];
279 dev
= malloc(sizeof(*dev
));
283 memset(dev
, 0, sizeof(*dev
));
284 dev
->addr
.domain
= domain
;
286 dev
->addr
.devid
= devid
;
287 dev
->addr
.function
= function
;
290 snprintf(filename
, sizeof(filename
), "%s/vendor", dirname
);
291 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
295 dev
->id
.vendor_id
= (uint16_t)tmp
;
298 snprintf(filename
, sizeof(filename
), "%s/device", dirname
);
299 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
303 dev
->id
.device_id
= (uint16_t)tmp
;
305 /* get subsystem_vendor id */
306 snprintf(filename
, sizeof(filename
), "%s/subsystem_vendor",
308 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
312 dev
->id
.subsystem_vendor_id
= (uint16_t)tmp
;
314 /* get subsystem_device id */
315 snprintf(filename
, sizeof(filename
), "%s/subsystem_device",
317 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
321 dev
->id
.subsystem_device_id
= (uint16_t)tmp
;
324 snprintf(filename
, sizeof(filename
), "%s/class",
326 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
330 /* the least 24 bits are valid: class, subclass, program interface */
331 dev
->id
.class_id
= (uint32_t)tmp
& RTE_CLASS_ANY_ID
;
335 snprintf(filename
, sizeof(filename
), "%s/max_vfs", dirname
);
336 if (!access(filename
, F_OK
) &&
337 eal_parse_sysfs_value(filename
, &tmp
) == 0)
338 dev
->max_vfs
= (uint16_t)tmp
;
340 /* for non igb_uio driver, need kernel version >= 3.8 */
341 snprintf(filename
, sizeof(filename
),
342 "%s/sriov_numvfs", dirname
);
343 if (!access(filename
, F_OK
) &&
344 eal_parse_sysfs_value(filename
, &tmp
) == 0)
345 dev
->max_vfs
= (uint16_t)tmp
;
349 snprintf(filename
, sizeof(filename
), "%s/numa_node",
351 if (access(filename
, R_OK
) != 0) {
352 /* if no NUMA support, set default to 0 */
353 dev
->device
.numa_node
= 0;
355 if (eal_parse_sysfs_value(filename
, &tmp
) < 0) {
359 dev
->device
.numa_node
= tmp
;
362 /* parse resources */
363 snprintf(filename
, sizeof(filename
), "%s/resource", dirname
);
364 if (pci_parse_sysfs_resource(filename
, dev
) < 0) {
365 RTE_LOG(ERR
, EAL
, "%s(): cannot parse resource\n", __func__
);
371 snprintf(filename
, sizeof(filename
), "%s/driver", dirname
);
372 ret
= pci_get_kernel_driver_by_path(filename
, driver
);
374 RTE_LOG(ERR
, EAL
, "Fail to get kernel driver\n");
380 if (!strcmp(driver
, "vfio-pci"))
381 dev
->kdrv
= RTE_KDRV_VFIO
;
382 else if (!strcmp(driver
, "igb_uio"))
383 dev
->kdrv
= RTE_KDRV_IGB_UIO
;
384 else if (!strcmp(driver
, "uio_pci_generic"))
385 dev
->kdrv
= RTE_KDRV_UIO_GENERIC
;
387 dev
->kdrv
= RTE_KDRV_UNKNOWN
;
389 dev
->kdrv
= RTE_KDRV_NONE
;
391 /* device is valid, add in list (sorted) */
392 if (TAILQ_EMPTY(&pci_device_list
)) {
393 rte_eal_device_insert(&dev
->device
);
394 TAILQ_INSERT_TAIL(&pci_device_list
, dev
, next
);
396 struct rte_pci_device
*dev2
;
399 TAILQ_FOREACH(dev2
, &pci_device_list
, next
) {
400 ret
= rte_eal_compare_pci_addr(&dev
->addr
, &dev2
->addr
);
405 TAILQ_INSERT_BEFORE(dev2
, dev
, next
);
406 rte_eal_device_insert(&dev
->device
);
407 } else { /* already registered */
408 dev2
->kdrv
= dev
->kdrv
;
409 dev2
->max_vfs
= dev
->max_vfs
;
410 memmove(dev2
->mem_resource
, dev
->mem_resource
,
411 sizeof(dev
->mem_resource
));
416 rte_eal_device_insert(&dev
->device
);
417 TAILQ_INSERT_TAIL(&pci_device_list
, dev
, next
);
424 pci_update_device(const struct rte_pci_addr
*addr
)
426 char filename
[PATH_MAX
];
428 snprintf(filename
, sizeof(filename
), "%s/" PCI_PRI_FMT
,
429 pci_get_sysfs_path(), addr
->domain
, addr
->bus
, addr
->devid
,
432 return pci_scan_one(filename
, addr
->domain
, addr
->bus
, addr
->devid
,
437 * split up a pci address into its constituent parts.
440 parse_pci_addr_format(const char *buf
, int bufsize
, uint16_t *domain
,
441 uint8_t *bus
, uint8_t *devid
, uint8_t *function
)
443 /* first split on ':' */
451 char *str
[PCI_FMT_NVAL
]; /* last element-separator is "." not ":" */
454 char *buf_copy
= strndup(buf
, bufsize
);
455 if (buf_copy
== NULL
)
458 if (rte_strsplit(buf_copy
, bufsize
, splitaddr
.str
, PCI_FMT_NVAL
, ':')
461 /* final split is on '.' between devid and function */
462 splitaddr
.function
= strchr(splitaddr
.devid
,'.');
463 if (splitaddr
.function
== NULL
)
465 *splitaddr
.function
++ = '\0';
467 /* now convert to int values */
469 *domain
= (uint16_t)strtoul(splitaddr
.domain
, NULL
, 16);
470 *bus
= (uint8_t)strtoul(splitaddr
.bus
, NULL
, 16);
471 *devid
= (uint8_t)strtoul(splitaddr
.devid
, NULL
, 16);
472 *function
= (uint8_t)strtoul(splitaddr
.function
, NULL
, 10);
476 free(buf_copy
); /* free the copy made with strdup */
484 * Scan the content of the PCI bus, and the devices in the devices
488 rte_eal_pci_scan(void)
492 char dirname
[PATH_MAX
];
494 uint8_t bus
, devid
, function
;
496 dir
= opendir(pci_get_sysfs_path());
498 RTE_LOG(ERR
, EAL
, "%s(): opendir failed: %s\n",
499 __func__
, strerror(errno
));
503 while ((e
= readdir(dir
)) != NULL
) {
504 if (e
->d_name
[0] == '.')
507 if (parse_pci_addr_format(e
->d_name
, sizeof(e
->d_name
), &domain
,
508 &bus
, &devid
, &function
) != 0)
511 snprintf(dirname
, sizeof(dirname
), "%s/%s",
512 pci_get_sysfs_path(), e
->d_name
);
513 if (pci_scan_one(dirname
, domain
, bus
, devid
, function
) < 0)
524 /* Read PCI config space. */
525 int rte_eal_pci_read_config(const struct rte_pci_device
*device
,
526 void *buf
, size_t len
, off_t offset
)
528 const struct rte_intr_handle
*intr_handle
= &device
->intr_handle
;
530 switch (intr_handle
->type
) {
531 case RTE_INTR_HANDLE_UIO
:
532 case RTE_INTR_HANDLE_UIO_INTX
:
533 return pci_uio_read_config(intr_handle
, buf
, len
, offset
);
536 case RTE_INTR_HANDLE_VFIO_MSIX
:
537 case RTE_INTR_HANDLE_VFIO_MSI
:
538 case RTE_INTR_HANDLE_VFIO_LEGACY
:
539 return pci_vfio_read_config(intr_handle
, buf
, len
, offset
);
543 "Unknown handle type of fd %d\n",
549 /* Write PCI config space. */
550 int rte_eal_pci_write_config(const struct rte_pci_device
*device
,
551 const void *buf
, size_t len
, off_t offset
)
553 const struct rte_intr_handle
*intr_handle
= &device
->intr_handle
;
555 switch (intr_handle
->type
) {
556 case RTE_INTR_HANDLE_UIO
:
557 case RTE_INTR_HANDLE_UIO_INTX
:
558 return pci_uio_write_config(intr_handle
, buf
, len
, offset
);
561 case RTE_INTR_HANDLE_VFIO_MSIX
:
562 case RTE_INTR_HANDLE_VFIO_MSI
:
563 case RTE_INTR_HANDLE_VFIO_LEGACY
:
564 return pci_vfio_write_config(intr_handle
, buf
, len
, offset
);
568 "Unknown handle type of fd %d\n",
574 #if defined(RTE_ARCH_X86)
576 pci_ioport_map(struct rte_pci_device
*dev
, int bar __rte_unused
,
577 struct rte_pci_ioport
*p
)
586 snprintf(pci_id
, sizeof(pci_id
), PCI_PRI_FMT
,
587 dev
->addr
.domain
, dev
->addr
.bus
,
588 dev
->addr
.devid
, dev
->addr
.function
);
590 fp
= fopen("/proc/ioports", "r");
592 RTE_LOG(ERR
, EAL
, "%s(): can't open ioports\n", __func__
);
596 while (getdelim(&line
, &linesz
, '\n', fp
) > 0) {
601 n
= strcspn(ptr
, ":");
605 while (*left
&& isspace(*left
))
608 if (!strncmp(left
, pci_id
, strlen(pci_id
))) {
611 while (*ptr
&& isspace(*ptr
))
614 sscanf(ptr
, "%04hx-%04hx", &start
, &end
);
626 dev
->intr_handle
.type
= RTE_INTR_HANDLE_UNKNOWN
;
628 RTE_LOG(DEBUG
, EAL
, "PCI Port IO found start=0x%x\n", start
);
635 rte_eal_pci_ioport_map(struct rte_pci_device
*dev
, int bar
,
636 struct rte_pci_ioport
*p
)
643 if (pci_vfio_is_enabled())
644 ret
= pci_vfio_ioport_map(dev
, bar
, p
);
647 case RTE_KDRV_IGB_UIO
:
648 ret
= pci_uio_ioport_map(dev
, bar
, p
);
650 case RTE_KDRV_UIO_GENERIC
:
651 #if defined(RTE_ARCH_X86)
652 ret
= pci_ioport_map(dev
, bar
, p
);
654 ret
= pci_uio_ioport_map(dev
, bar
, p
);
658 #if defined(RTE_ARCH_X86)
659 ret
= pci_ioport_map(dev
, bar
, p
);
673 rte_eal_pci_ioport_read(struct rte_pci_ioport
*p
,
674 void *data
, size_t len
, off_t offset
)
676 switch (p
->dev
->kdrv
) {
679 pci_vfio_ioport_read(p
, data
, len
, offset
);
682 case RTE_KDRV_IGB_UIO
:
683 pci_uio_ioport_read(p
, data
, len
, offset
);
685 case RTE_KDRV_UIO_GENERIC
:
686 pci_uio_ioport_read(p
, data
, len
, offset
);
689 #if defined(RTE_ARCH_X86)
690 pci_uio_ioport_read(p
, data
, len
, offset
);
699 rte_eal_pci_ioport_write(struct rte_pci_ioport
*p
,
700 const void *data
, size_t len
, off_t offset
)
702 switch (p
->dev
->kdrv
) {
705 pci_vfio_ioport_write(p
, data
, len
, offset
);
708 case RTE_KDRV_IGB_UIO
:
709 pci_uio_ioport_write(p
, data
, len
, offset
);
711 case RTE_KDRV_UIO_GENERIC
:
712 pci_uio_ioport_write(p
, data
, len
, offset
);
715 #if defined(RTE_ARCH_X86)
716 pci_uio_ioport_write(p
, data
, len
, offset
);
725 rte_eal_pci_ioport_unmap(struct rte_pci_ioport
*p
)
729 switch (p
->dev
->kdrv
) {
732 if (pci_vfio_is_enabled())
733 ret
= pci_vfio_ioport_unmap(p
);
736 case RTE_KDRV_IGB_UIO
:
737 ret
= pci_uio_ioport_unmap(p
);
739 case RTE_KDRV_UIO_GENERIC
:
740 #if defined(RTE_ARCH_X86)
743 ret
= pci_uio_ioport_unmap(p
);
747 #if defined(RTE_ARCH_X86)
758 /* Init the PCI EAL subsystem */
760 rte_eal_pci_init(void)
762 /* for debug purposes, PCI can be disabled */
763 if (internal_config
.no_pci
)
766 if (rte_eal_pci_scan() < 0) {
767 RTE_LOG(ERR
, EAL
, "%s(): Cannot scan PCI bus\n", __func__
);