]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #include <string.h> | |
35 | #include <sys/types.h> | |
36 | #include <sys/file.h> | |
37 | #include <dirent.h> | |
38 | #include <stdint.h> | |
39 | #include <stdlib.h> | |
40 | #include <stdio.h> | |
41 | #include <fnmatch.h> | |
42 | #include <inttypes.h> | |
43 | #include <stdarg.h> | |
44 | #include <unistd.h> | |
45 | #include <errno.h> | |
46 | #include <sys/queue.h> | |
47 | ||
48 | #include <rte_memory.h> | |
49 | #include <rte_memzone.h> | |
50 | #include <rte_eal.h> | |
51 | #include <rte_launch.h> | |
52 | #include <rte_per_lcore.h> | |
53 | #include <rte_lcore.h> | |
54 | #include <rte_debug.h> | |
55 | #include <rte_log.h> | |
56 | #include <rte_common.h> | |
57 | #include "rte_string_fns.h" | |
58 | #include "eal_internal_cfg.h" | |
59 | #include "eal_hugepages.h" | |
60 | #include "eal_filesystem.h" | |
61 | ||
62 | static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; | |
63 | ||
64 | /* this function is only called from eal_hugepage_info_init which itself | |
65 | * is only called from a primary process */ | |
66 | static uint32_t | |
67 | get_num_hugepages(const char *subdir) | |
68 | { | |
69 | char path[PATH_MAX]; | |
70 | long unsigned resv_pages, num_pages = 0; | |
71 | const char *nr_hp_file = "free_hugepages"; | |
72 | const char *nr_rsvd_file = "resv_hugepages"; | |
73 | ||
74 | /* first, check how many reserved pages kernel reports */ | |
75 | snprintf(path, sizeof(path), "%s/%s/%s", | |
76 | sys_dir_path, subdir, nr_rsvd_file); | |
77 | if (eal_parse_sysfs_value(path, &resv_pages) < 0) | |
78 | return 0; | |
79 | ||
80 | snprintf(path, sizeof(path), "%s/%s/%s", | |
81 | sys_dir_path, subdir, nr_hp_file); | |
82 | if (eal_parse_sysfs_value(path, &num_pages) < 0) | |
83 | return 0; | |
84 | ||
85 | if (num_pages == 0) | |
86 | RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", | |
87 | subdir); | |
88 | ||
89 | /* adjust num_pages */ | |
90 | if (num_pages >= resv_pages) | |
91 | num_pages -= resv_pages; | |
92 | else if (resv_pages) | |
93 | num_pages = 0; | |
94 | ||
95 | /* we want to return a uint32_t and more than this looks suspicious | |
96 | * anyway ... */ | |
97 | if (num_pages > UINT32_MAX) | |
98 | num_pages = UINT32_MAX; | |
99 | ||
100 | return num_pages; | |
101 | } | |
102 | ||
103 | static uint64_t | |
104 | get_default_hp_size(void) | |
105 | { | |
106 | const char proc_meminfo[] = "/proc/meminfo"; | |
107 | const char str_hugepagesz[] = "Hugepagesize:"; | |
108 | unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1; | |
109 | char buffer[256]; | |
110 | unsigned long long size = 0; | |
111 | ||
112 | FILE *fd = fopen(proc_meminfo, "r"); | |
113 | if (fd == NULL) | |
114 | rte_panic("Cannot open %s\n", proc_meminfo); | |
115 | while(fgets(buffer, sizeof(buffer), fd)){ | |
116 | if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ | |
117 | size = rte_str_to_size(&buffer[hugepagesz_len]); | |
118 | break; | |
119 | } | |
120 | } | |
121 | fclose(fd); | |
122 | if (size == 0) | |
123 | rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo); | |
124 | return size; | |
125 | } | |
126 | ||
127 | static const char * | |
128 | get_hugepage_dir(uint64_t hugepage_sz) | |
129 | { | |
130 | enum proc_mount_fieldnames { | |
131 | DEVICE = 0, | |
132 | MOUNTPT, | |
133 | FSTYPE, | |
134 | OPTIONS, | |
135 | _FIELDNAME_MAX | |
136 | }; | |
137 | static uint64_t default_size = 0; | |
138 | const char proc_mounts[] = "/proc/mounts"; | |
139 | const char hugetlbfs_str[] = "hugetlbfs"; | |
140 | const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; | |
141 | const char pagesize_opt[] = "pagesize="; | |
142 | const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; | |
143 | const char split_tok = ' '; | |
144 | char *splitstr[_FIELDNAME_MAX]; | |
145 | char buf[BUFSIZ]; | |
146 | char *retval = NULL; | |
147 | ||
148 | FILE *fd = fopen(proc_mounts, "r"); | |
149 | if (fd == NULL) | |
150 | rte_panic("Cannot open %s\n", proc_mounts); | |
151 | ||
152 | if (default_size == 0) | |
153 | default_size = get_default_hp_size(); | |
154 | ||
155 | while (fgets(buf, sizeof(buf), fd)){ | |
156 | if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, | |
157 | split_tok) != _FIELDNAME_MAX) { | |
158 | RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); | |
159 | break; /* return NULL */ | |
160 | } | |
161 | ||
162 | /* we have a specified --huge-dir option, only examine that dir */ | |
163 | if (internal_config.hugepage_dir != NULL && | |
164 | strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) | |
165 | continue; | |
166 | ||
167 | if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ | |
168 | const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); | |
169 | ||
170 | /* if no explicit page size, the default page size is compared */ | |
171 | if (pagesz_str == NULL){ | |
172 | if (hugepage_sz == default_size){ | |
173 | retval = strdup(splitstr[MOUNTPT]); | |
174 | break; | |
175 | } | |
176 | } | |
177 | /* there is an explicit page size, so check it */ | |
178 | else { | |
179 | uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); | |
180 | if (pagesz == hugepage_sz) { | |
181 | retval = strdup(splitstr[MOUNTPT]); | |
182 | break; | |
183 | } | |
184 | } | |
185 | } /* end if strncmp hugetlbfs */ | |
186 | } /* end while fgets */ | |
187 | ||
188 | fclose(fd); | |
189 | return retval; | |
190 | } | |
191 | ||
192 | /* | |
193 | * Clear the hugepage directory of whatever hugepage files | |
194 | * there are. Checks if the file is locked (i.e. | |
195 | * if it's in use by another DPDK process). | |
196 | */ | |
197 | static int | |
198 | clear_hugedir(const char * hugedir) | |
199 | { | |
200 | DIR *dir; | |
201 | struct dirent *dirent; | |
202 | int dir_fd, fd, lck_result; | |
203 | const char filter[] = "*map_*"; /* matches hugepage files */ | |
204 | ||
205 | /* open directory */ | |
206 | dir = opendir(hugedir); | |
207 | if (!dir) { | |
208 | RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n", | |
209 | hugedir); | |
210 | goto error; | |
211 | } | |
212 | dir_fd = dirfd(dir); | |
213 | ||
214 | dirent = readdir(dir); | |
215 | if (!dirent) { | |
216 | RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n", | |
217 | hugedir); | |
218 | goto error; | |
219 | } | |
220 | ||
221 | while(dirent != NULL){ | |
222 | /* skip files that don't match the hugepage pattern */ | |
223 | if (fnmatch(filter, dirent->d_name, 0) > 0) { | |
224 | dirent = readdir(dir); | |
225 | continue; | |
226 | } | |
227 | ||
228 | /* try and lock the file */ | |
229 | fd = openat(dir_fd, dirent->d_name, O_RDONLY); | |
230 | ||
231 | /* skip to next file */ | |
232 | if (fd == -1) { | |
233 | dirent = readdir(dir); | |
234 | continue; | |
235 | } | |
236 | ||
237 | /* non-blocking lock */ | |
238 | lck_result = flock(fd, LOCK_EX | LOCK_NB); | |
239 | ||
240 | /* if lock succeeds, unlock and remove the file */ | |
241 | if (lck_result != -1) { | |
242 | flock(fd, LOCK_UN); | |
243 | unlinkat(dir_fd, dirent->d_name, 0); | |
244 | } | |
245 | close (fd); | |
246 | dirent = readdir(dir); | |
247 | } | |
248 | ||
249 | closedir(dir); | |
250 | return 0; | |
251 | ||
252 | error: | |
253 | if (dir) | |
254 | closedir(dir); | |
255 | ||
256 | RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n", | |
257 | strerror(errno)); | |
258 | ||
259 | return -1; | |
260 | } | |
261 | ||
262 | static int | |
263 | compare_hpi(const void *a, const void *b) | |
264 | { | |
265 | const struct hugepage_info *hpi_a = a; | |
266 | const struct hugepage_info *hpi_b = b; | |
267 | ||
268 | return hpi_b->hugepage_sz - hpi_a->hugepage_sz; | |
269 | } | |
270 | ||
271 | /* | |
272 | * when we initialize the hugepage info, everything goes | |
273 | * to socket 0 by default. it will later get sorted by memory | |
274 | * initialization procedure. | |
275 | */ | |
276 | int | |
277 | eal_hugepage_info_init(void) | |
278 | { | |
279 | const char dirent_start_text[] = "hugepages-"; | |
280 | const size_t dirent_start_len = sizeof(dirent_start_text) - 1; | |
281 | unsigned i, num_sizes = 0; | |
282 | DIR *dir; | |
283 | struct dirent *dirent; | |
284 | ||
285 | dir = opendir(sys_dir_path); | |
286 | if (dir == NULL) | |
287 | rte_panic("Cannot open directory %s to read system hugepage " | |
288 | "info\n", sys_dir_path); | |
289 | ||
290 | for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { | |
291 | struct hugepage_info *hpi; | |
292 | ||
293 | if (strncmp(dirent->d_name, dirent_start_text, | |
294 | dirent_start_len) != 0) | |
295 | continue; | |
296 | ||
297 | if (num_sizes >= MAX_HUGEPAGE_SIZES) | |
298 | break; | |
299 | ||
300 | hpi = &internal_config.hugepage_info[num_sizes]; | |
301 | hpi->hugepage_sz = | |
302 | rte_str_to_size(&dirent->d_name[dirent_start_len]); | |
303 | hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); | |
304 | ||
305 | /* first, check if we have a mountpoint */ | |
306 | if (hpi->hugedir == NULL) { | |
307 | uint32_t num_pages; | |
308 | ||
309 | num_pages = get_num_hugepages(dirent->d_name); | |
310 | if (num_pages > 0) | |
311 | RTE_LOG(NOTICE, EAL, | |
312 | "%" PRIu32 " hugepages of size " | |
313 | "%" PRIu64 " reserved, but no mounted " | |
314 | "hugetlbfs found for that size\n", | |
315 | num_pages, hpi->hugepage_sz); | |
316 | continue; | |
317 | } | |
318 | ||
319 | /* try to obtain a writelock */ | |
320 | hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); | |
321 | ||
322 | /* if blocking lock failed */ | |
323 | if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { | |
324 | RTE_LOG(CRIT, EAL, | |
325 | "Failed to lock hugepage directory!\n"); | |
326 | break; | |
327 | } | |
328 | /* clear out the hugepages dir from unused pages */ | |
329 | if (clear_hugedir(hpi->hugedir) == -1) | |
330 | break; | |
331 | ||
332 | /* for now, put all pages into socket 0, | |
333 | * later they will be sorted */ | |
334 | hpi->num_pages[0] = get_num_hugepages(dirent->d_name); | |
335 | ||
336 | #ifndef RTE_ARCH_64 | |
337 | /* for 32-bit systems, limit number of hugepages to | |
338 | * 1GB per page size */ | |
339 | hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], | |
340 | RTE_PGSIZE_1G / hpi->hugepage_sz); | |
341 | #endif | |
342 | ||
343 | num_sizes++; | |
344 | } | |
345 | closedir(dir); | |
346 | ||
347 | /* something went wrong, and we broke from the for loop above */ | |
348 | if (dirent != NULL) | |
349 | return -1; | |
350 | ||
351 | internal_config.num_hugepage_sizes = num_sizes; | |
352 | ||
353 | /* sort the page directory entries by size, largest to smallest */ | |
354 | qsort(&internal_config.hugepage_info[0], num_sizes, | |
355 | sizeof(internal_config.hugepage_info[0]), compare_hpi); | |
356 | ||
357 | /* now we have all info, check we have at least one valid size */ | |
358 | for (i = 0; i < num_sizes; i++) | |
359 | if (internal_config.hugepage_info[i].hugedir != NULL && | |
360 | internal_config.hugepage_info[i].num_pages[0] > 0) | |
361 | return 0; | |
362 | ||
363 | /* no valid hugepage mounts available, return error */ | |
364 | return -1; | |
365 | } |