]>
git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
18 #include <sys/queue.h>
21 #include <linux/mman.h> /* for hugetlb-related flags */
23 #include <rte_memory.h>
25 #include <rte_launch.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_debug.h>
30 #include <rte_common.h>
31 #include "rte_string_fns.h"
32 #include "eal_internal_cfg.h"
33 #include "eal_hugepages.h"
34 #include "eal_filesystem.h"
36 static const char sys_dir_path
[] = "/sys/kernel/mm/hugepages";
37 static const char sys_pages_numa_dir_path
[] = "/sys/devices/system/node";
40 * Uses mmap to create a shared memory area for storage of data
41 * Used in this file to store the hugepage file map on disk
44 map_shared_memory(const char *filename
, const size_t mem_size
, int flags
)
47 int fd
= open(filename
, flags
, 0666);
50 if (ftruncate(fd
, mem_size
) < 0) {
54 retval
= mmap(NULL
, mem_size
, PROT_READ
| PROT_WRITE
,
61 open_shared_memory(const char *filename
, const size_t mem_size
)
63 return map_shared_memory(filename
, mem_size
, O_RDWR
);
67 create_shared_memory(const char *filename
, const size_t mem_size
)
69 return map_shared_memory(filename
, mem_size
, O_RDWR
| O_CREAT
);
72 /* this function is only called from eal_hugepage_info_init which itself
73 * is only called from a primary process */
75 get_num_hugepages(const char *subdir
)
78 long unsigned resv_pages
, num_pages
= 0;
79 const char *nr_hp_file
= "free_hugepages";
80 const char *nr_rsvd_file
= "resv_hugepages";
82 /* first, check how many reserved pages kernel reports */
83 snprintf(path
, sizeof(path
), "%s/%s/%s",
84 sys_dir_path
, subdir
, nr_rsvd_file
);
85 if (eal_parse_sysfs_value(path
, &resv_pages
) < 0)
88 snprintf(path
, sizeof(path
), "%s/%s/%s",
89 sys_dir_path
, subdir
, nr_hp_file
);
90 if (eal_parse_sysfs_value(path
, &num_pages
) < 0)
94 RTE_LOG(WARNING
, EAL
, "No free hugepages reported in %s\n",
97 /* adjust num_pages */
98 if (num_pages
>= resv_pages
)
99 num_pages
-= resv_pages
;
103 /* we want to return a uint32_t and more than this looks suspicious
105 if (num_pages
> UINT32_MAX
)
106 num_pages
= UINT32_MAX
;
112 get_num_hugepages_on_node(const char *subdir
, unsigned int socket
)
114 char path
[PATH_MAX
], socketpath
[PATH_MAX
];
116 unsigned long num_pages
= 0;
117 const char *nr_hp_file
= "free_hugepages";
119 snprintf(socketpath
, sizeof(socketpath
), "%s/node%u/hugepages",
120 sys_pages_numa_dir_path
, socket
);
122 socketdir
= opendir(socketpath
);
124 /* Keep calm and carry on */
127 /* Can't find socket dir, so ignore it */
131 snprintf(path
, sizeof(path
), "%s/%s/%s",
132 socketpath
, subdir
, nr_hp_file
);
133 if (eal_parse_sysfs_value(path
, &num_pages
) < 0)
137 RTE_LOG(WARNING
, EAL
, "No free hugepages reported in %s\n",
141 * we want to return a uint32_t and more than this looks suspicious
144 if (num_pages
> UINT32_MAX
)
145 num_pages
= UINT32_MAX
;
151 get_default_hp_size(void)
153 const char proc_meminfo
[] = "/proc/meminfo";
154 const char str_hugepagesz
[] = "Hugepagesize:";
155 unsigned hugepagesz_len
= sizeof(str_hugepagesz
) - 1;
157 unsigned long long size
= 0;
159 FILE *fd
= fopen(proc_meminfo
, "r");
161 rte_panic("Cannot open %s\n", proc_meminfo
);
162 while(fgets(buffer
, sizeof(buffer
), fd
)){
163 if (strncmp(buffer
, str_hugepagesz
, hugepagesz_len
) == 0){
164 size
= rte_str_to_size(&buffer
[hugepagesz_len
]);
170 rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo
);
175 get_hugepage_dir(uint64_t hugepage_sz
, char *hugedir
, int len
)
177 enum proc_mount_fieldnames
{
184 static uint64_t default_size
= 0;
185 const char proc_mounts
[] = "/proc/mounts";
186 const char hugetlbfs_str
[] = "hugetlbfs";
187 const size_t htlbfs_str_len
= sizeof(hugetlbfs_str
) - 1;
188 const char pagesize_opt
[] = "pagesize=";
189 const size_t pagesize_opt_len
= sizeof(pagesize_opt
) - 1;
190 const char split_tok
= ' ';
191 char *splitstr
[_FIELDNAME_MAX
];
195 FILE *fd
= fopen(proc_mounts
, "r");
197 rte_panic("Cannot open %s\n", proc_mounts
);
199 if (default_size
== 0)
200 default_size
= get_default_hp_size();
202 while (fgets(buf
, sizeof(buf
), fd
)){
203 if (rte_strsplit(buf
, sizeof(buf
), splitstr
, _FIELDNAME_MAX
,
204 split_tok
) != _FIELDNAME_MAX
) {
205 RTE_LOG(ERR
, EAL
, "Error parsing %s\n", proc_mounts
);
206 break; /* return NULL */
209 /* we have a specified --huge-dir option, only examine that dir */
210 if (internal_config
.hugepage_dir
!= NULL
&&
211 strcmp(splitstr
[MOUNTPT
], internal_config
.hugepage_dir
) != 0)
214 if (strncmp(splitstr
[FSTYPE
], hugetlbfs_str
, htlbfs_str_len
) == 0){
215 const char *pagesz_str
= strstr(splitstr
[OPTIONS
], pagesize_opt
);
217 /* if no explicit page size, the default page size is compared */
218 if (pagesz_str
== NULL
){
219 if (hugepage_sz
== default_size
){
220 strlcpy(hugedir
, splitstr
[MOUNTPT
], len
);
225 /* there is an explicit page size, so check it */
227 uint64_t pagesz
= rte_str_to_size(&pagesz_str
[pagesize_opt_len
]);
228 if (pagesz
== hugepage_sz
) {
229 strlcpy(hugedir
, splitstr
[MOUNTPT
], len
);
234 } /* end if strncmp hugetlbfs */
235 } /* end while fgets */
242 * Clear the hugepage directory of whatever hugepage files
243 * there are. Checks if the file is locked (i.e.
244 * if it's in use by another DPDK process).
247 clear_hugedir(const char * hugedir
)
250 struct dirent
*dirent
;
251 int dir_fd
, fd
, lck_result
;
252 const char filter
[] = "*map_*"; /* matches hugepage files */
255 dir
= opendir(hugedir
);
257 RTE_LOG(ERR
, EAL
, "Unable to open hugepage directory %s\n",
263 dirent
= readdir(dir
);
265 RTE_LOG(ERR
, EAL
, "Unable to read hugepage directory %s\n",
270 while(dirent
!= NULL
){
271 /* skip files that don't match the hugepage pattern */
272 if (fnmatch(filter
, dirent
->d_name
, 0) > 0) {
273 dirent
= readdir(dir
);
277 /* try and lock the file */
278 fd
= openat(dir_fd
, dirent
->d_name
, O_RDONLY
);
280 /* skip to next file */
282 dirent
= readdir(dir
);
286 /* non-blocking lock */
287 lck_result
= flock(fd
, LOCK_EX
| LOCK_NB
);
289 /* if lock succeeds, remove the file */
290 if (lck_result
!= -1)
291 unlinkat(dir_fd
, dirent
->d_name
, 0);
293 dirent
= readdir(dir
);
303 RTE_LOG(ERR
, EAL
, "Error while clearing hugepage dir: %s\n",
310 compare_hpi(const void *a
, const void *b
)
312 const struct hugepage_info
*hpi_a
= a
;
313 const struct hugepage_info
*hpi_b
= b
;
315 return hpi_b
->hugepage_sz
- hpi_a
->hugepage_sz
;
319 calc_num_pages(struct hugepage_info
*hpi
, struct dirent
*dirent
)
321 uint64_t total_pages
= 0;
325 * first, try to put all hugepages into relevant sockets, but
326 * if first attempts fails, fall back to collecting all pages
327 * in one socket and sorting them later
330 /* we also don't want to do this for legacy init */
331 if (!internal_config
.legacy_mem
)
332 for (i
= 0; i
< rte_socket_count(); i
++) {
333 int socket
= rte_socket_id_by_idx(i
);
334 unsigned int num_pages
=
335 get_num_hugepages_on_node(
336 dirent
->d_name
, socket
);
337 hpi
->num_pages
[socket
] = num_pages
;
338 total_pages
+= num_pages
;
341 * we failed to sort memory from the get go, so fall
344 if (total_pages
== 0) {
345 hpi
->num_pages
[0] = get_num_hugepages(dirent
->d_name
);
348 /* for 32-bit systems, limit number of hugepages to
349 * 1GB per page size */
350 hpi
->num_pages
[0] = RTE_MIN(hpi
->num_pages
[0],
351 RTE_PGSIZE_1G
/ hpi
->hugepage_sz
);
357 hugepage_info_init(void)
358 { const char dirent_start_text
[] = "hugepages-";
359 const size_t dirent_start_len
= sizeof(dirent_start_text
) - 1;
360 unsigned int i
, num_sizes
= 0;
362 struct dirent
*dirent
;
364 dir
= opendir(sys_dir_path
);
367 "Cannot open directory %s to read system hugepage info\n",
372 for (dirent
= readdir(dir
); dirent
!= NULL
; dirent
= readdir(dir
)) {
373 struct hugepage_info
*hpi
;
375 if (strncmp(dirent
->d_name
, dirent_start_text
,
376 dirent_start_len
) != 0)
379 if (num_sizes
>= MAX_HUGEPAGE_SIZES
)
382 hpi
= &internal_config
.hugepage_info
[num_sizes
];
384 rte_str_to_size(&dirent
->d_name
[dirent_start_len
]);
386 /* first, check if we have a mountpoint */
387 if (get_hugepage_dir(hpi
->hugepage_sz
,
388 hpi
->hugedir
, sizeof(hpi
->hugedir
)) < 0) {
391 num_pages
= get_num_hugepages(dirent
->d_name
);
394 "%" PRIu32
" hugepages of size "
395 "%" PRIu64
" reserved, but no mounted "
396 "hugetlbfs found for that size\n",
397 num_pages
, hpi
->hugepage_sz
);
398 /* if we have kernel support for reserving hugepages
399 * through mmap, and we're in in-memory mode, treat this
400 * page size as valid. we cannot be in legacy mode at
401 * this point because we've checked this earlier in the
404 #ifdef MAP_HUGE_SHIFT
405 if (internal_config
.in_memory
) {
406 RTE_LOG(DEBUG
, EAL
, "In-memory mode enabled, "
407 "hugepages of size %" PRIu64
" bytes "
408 "will be allocated anonymously\n",
410 calc_num_pages(hpi
, dirent
);
417 /* try to obtain a writelock */
418 hpi
->lock_descriptor
= open(hpi
->hugedir
, O_RDONLY
);
420 /* if blocking lock failed */
421 if (flock(hpi
->lock_descriptor
, LOCK_EX
) == -1) {
423 "Failed to lock hugepage directory!\n");
426 /* clear out the hugepages dir from unused pages */
427 if (clear_hugedir(hpi
->hugedir
) == -1)
430 calc_num_pages(hpi
, dirent
);
436 /* something went wrong, and we broke from the for loop above */
440 internal_config
.num_hugepage_sizes
= num_sizes
;
442 /* sort the page directory entries by size, largest to smallest */
443 qsort(&internal_config
.hugepage_info
[0], num_sizes
,
444 sizeof(internal_config
.hugepage_info
[0]), compare_hpi
);
446 /* now we have all info, check we have at least one valid size */
447 for (i
= 0; i
< num_sizes
; i
++) {
448 /* pages may no longer all be on socket 0, so check all */
449 unsigned int j
, num_pages
= 0;
450 struct hugepage_info
*hpi
= &internal_config
.hugepage_info
[i
];
452 for (j
= 0; j
< RTE_MAX_NUMA_NODES
; j
++)
453 num_pages
+= hpi
->num_pages
[j
];
458 /* no valid hugepage mounts available, return error */
463 * when we initialize the hugepage info, everything goes
464 * to socket 0 by default. it will later get sorted by memory
465 * initialization procedure.
468 eal_hugepage_info_init(void)
470 struct hugepage_info
*hpi
, *tmp_hpi
;
473 if (hugepage_info_init() < 0)
476 /* for no shared files mode, we're done */
477 if (internal_config
.no_shconf
)
480 hpi
= &internal_config
.hugepage_info
[0];
482 tmp_hpi
= create_shared_memory(eal_hugepage_info_path(),
483 sizeof(internal_config
.hugepage_info
));
484 if (tmp_hpi
== NULL
) {
485 RTE_LOG(ERR
, EAL
, "Failed to create shared memory!\n");
489 memcpy(tmp_hpi
, hpi
, sizeof(internal_config
.hugepage_info
));
491 /* we've copied file descriptors along with everything else, but they
492 * will be invalid in secondary process, so overwrite them
494 for (i
= 0; i
< RTE_DIM(internal_config
.hugepage_info
); i
++) {
495 struct hugepage_info
*tmp
= &tmp_hpi
[i
];
496 tmp
->lock_descriptor
= -1;
499 if (munmap(tmp_hpi
, sizeof(internal_config
.hugepage_info
)) < 0) {
500 RTE_LOG(ERR
, EAL
, "Failed to unmap shared memory!\n");
506 int eal_hugepage_info_read(void)
508 struct hugepage_info
*hpi
= &internal_config
.hugepage_info
[0];
509 struct hugepage_info
*tmp_hpi
;
511 tmp_hpi
= open_shared_memory(eal_hugepage_info_path(),
512 sizeof(internal_config
.hugepage_info
));
513 if (tmp_hpi
== NULL
) {
514 RTE_LOG(ERR
, EAL
, "Failed to open shared memory!\n");
518 memcpy(hpi
, tmp_hpi
, sizeof(internal_config
.hugepage_info
));
520 if (munmap(tmp_hpi
, sizeof(internal_config
.hugepage_info
)) < 0) {
521 RTE_LOG(ERR
, EAL
, "Failed to unmap shared memory!\n");