4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include <sys/types.h>
45 #include <sys/queue.h>
50 #include <sys/ioctl.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
58 #include <rte_eal_memconfig.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_common.h>
62 #include <rte_string_fns.h>
64 #include "eal_private.h"
65 #include "eal_internal_cfg.h"
66 #include "eal_filesystem.h"
67 #include <exec-env/rte_dom0_common.h>
69 #define PAGE_SIZE RTE_PGSIZE_4K
70 #define DEFAUL_DOM0_NAME "dom0-mem"
72 static int xen_fd
= -1;
73 static const char sys_dir_path
[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
76 * Try to mmap *size bytes in /dev/zero. If it is successful, return the
77 * pointer to the mmap'd area and keep *size unmodified. Else, retry
78 * with a smaller zone: decrease *size by mem_size until it reaches
79 * 0. In this case, return NULL. Note: this function returns an address
80 * which is a multiple of mem_size size.
83 xen_get_virtual_area(size_t *size
, size_t mem_size
)
89 RTE_LOG(DEBUG
, EAL
, "Ask a virtual area of 0x%zu bytes\n", *size
);
91 fd
= open("/dev/zero", O_RDONLY
);
93 RTE_LOG(ERR
, EAL
, "Cannot open /dev/zero\n");
97 addr
= mmap(NULL
, (*size
) + mem_size
, PROT_READ
,
99 if (addr
== MAP_FAILED
)
101 } while (addr
== MAP_FAILED
&& *size
> 0);
103 if (addr
== MAP_FAILED
) {
105 RTE_LOG(ERR
, EAL
, "Cannot get a virtual area\n");
109 munmap(addr
, (*size
) + mem_size
);
112 /* align addr to a mem_size boundary */
113 aligned_addr
= (uintptr_t)addr
;
114 aligned_addr
= RTE_ALIGN_CEIL(aligned_addr
, mem_size
);
115 addr
= (void *)(aligned_addr
);
117 RTE_LOG(DEBUG
, EAL
, "Virtual area found at %p (size = 0x%zx)\n",
124 * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
125 * /memsize-mB/memsize file, and the size unit is mB.
128 get_xen_memory_size(void)
131 unsigned long mem_size
= 0;
132 static const char *file_name
;
134 file_name
= "memsize";
135 snprintf(path
, sizeof(path
), "%s/%s",
136 sys_dir_path
, file_name
);
138 if (eal_parse_sysfs_value(path
, &mem_size
) < 0)
142 rte_exit(EXIT_FAILURE
,"XEN-DOM0:the %s/%s was not"
143 " configured.\n",sys_dir_path
, file_name
);
145 rte_exit(EXIT_FAILURE
,"XEN-DOM0:the %s/%s must be"
146 " even number.\n",sys_dir_path
, file_name
);
148 if (mem_size
> DOM0_CONFIG_MEMSIZE
)
149 rte_exit(EXIT_FAILURE
,"XEN-DOM0:the %s/%s should not be larger"
150 " than %d mB\n",sys_dir_path
, file_name
, DOM0_CONFIG_MEMSIZE
);
156 * Based on physical address to caculate MFN in Xen Dom0.
159 rte_xen_mem_phy2mch(int32_t memseg_id
, const phys_addr_t phy_addr
)
162 uint64_t mfn
, mfn_offset
;
163 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
164 struct rte_memseg
*memseg
= mcfg
->memseg
;
166 /* find the memory segment owning the physical address */
167 if (memseg_id
== -1) {
168 for (i
= 0; i
< RTE_MAX_MEMSEG
; i
++) {
169 if ((phy_addr
>= memseg
[i
].phys_addr
) &&
170 (phy_addr
< memseg
[i
].phys_addr
+
177 return RTE_BAD_PHYS_ADDR
;
180 mfn_id
= (phy_addr
- memseg
[memseg_id
].phys_addr
) / RTE_PGSIZE_2M
;
182 /*the MFN is contiguous in 2M */
183 mfn_offset
= (phy_addr
- memseg
[memseg_id
].phys_addr
) %
184 RTE_PGSIZE_2M
/ PAGE_SIZE
;
185 mfn
= mfn_offset
+ memseg
[memseg_id
].mfn
[mfn_id
];
187 /** return mechine address */
188 return mfn
* PAGE_SIZE
+ phy_addr
% PAGE_SIZE
;
192 rte_xen_dom0_memory_init(void)
194 void *vir_addr
, *vma_addr
= NULL
;
196 uint32_t i
, requested
, mem_size
, memseg_idx
, num_memseg
= 0;
198 struct memory_info meminfo
;
199 struct memseg_info seginfo
[RTE_MAX_MEMSEG
];
200 int flags
, page_size
= getpagesize();
201 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
202 struct rte_memseg
*memseg
= mcfg
->memseg
;
203 uint64_t total_mem
= internal_config
.memory
;
205 memset(seginfo
, 0, sizeof(seginfo
));
206 memset(&meminfo
, 0, sizeof(struct memory_info
));
208 mem_size
= get_xen_memory_size();
209 requested
= (unsigned) (total_mem
/ 0x100000);
210 if (requested
> mem_size
)
211 /* if we didn't satisfy total memory requirements */
212 rte_exit(EXIT_FAILURE
,"Not enough memory available! Requested: %uMB,"
213 " available: %uMB\n", requested
, mem_size
);
214 else if (total_mem
!= 0)
215 mem_size
= requested
;
217 /* Check FD and open once */
219 xen_fd
= open(DOM0_MM_DEV
, O_RDWR
);
221 RTE_LOG(ERR
, EAL
, "Can not open %s\n",DOM0_MM_DEV
);
226 meminfo
.size
= mem_size
;
228 /* construct memory mangement name for Dom0 */
229 snprintf(meminfo
.name
, DOM0_NAME_MAX
, "%s-%s",
230 internal_config
.hugefile_prefix
, DEFAUL_DOM0_NAME
);
232 /* Notify kernel driver to allocate memory */
233 ret
= ioctl(xen_fd
, RTE_DOM0_IOCTL_PREPARE_MEMSEG
, &meminfo
);
235 RTE_LOG(ERR
, EAL
, "XEN DOM0:failed to get memory\n");
240 /* Get number of memory segment from driver */
241 ret
= ioctl(xen_fd
, RTE_DOM0_IOCTL_GET_NUM_MEMSEG
, &num_memseg
);
243 RTE_LOG(ERR
, EAL
, "XEN DOM0:failed to get memseg count.\n");
248 if(num_memseg
> RTE_MAX_MEMSEG
){
249 RTE_LOG(ERR
, EAL
, "XEN DOM0: the memseg count %d is greater"
250 " than max memseg %d.\n",num_memseg
, RTE_MAX_MEMSEG
);
255 /* get all memory segements information */
256 ret
= ioctl(xen_fd
, RTE_DOM0_IOCTL_GET_MEMSEG_INFO
, seginfo
);
258 RTE_LOG(ERR
, EAL
, "XEN DOM0:failed to get memseg info.\n");
263 /* map all memory segments to contiguous user space */
264 for (memseg_idx
= 0; memseg_idx
< num_memseg
; memseg_idx
++)
266 vma_len
= seginfo
[memseg_idx
].size
;
269 * get the biggest virtual memory area up to vma_len. If it fails,
270 * vma_addr is NULL, so let the kernel provide the address.
272 vma_addr
= xen_get_virtual_area(&vma_len
, RTE_PGSIZE_2M
);
273 if (vma_addr
== NULL
) {
275 vma_len
= RTE_PGSIZE_2M
;
277 flags
= MAP_SHARED
| MAP_FIXED
;
279 seginfo
[memseg_idx
].size
= vma_len
;
280 vir_addr
= mmap(vma_addr
, seginfo
[memseg_idx
].size
,
281 PROT_READ
|PROT_WRITE
, flags
, xen_fd
,
282 memseg_idx
* page_size
);
283 if (vir_addr
== MAP_FAILED
) {
284 RTE_LOG(ERR
, EAL
, "XEN DOM0:Could not mmap %s\n",
290 memseg
[memseg_idx
].addr
= vir_addr
;
291 memseg
[memseg_idx
].phys_addr
= page_size
*
292 seginfo
[memseg_idx
].pfn
;
293 memseg
[memseg_idx
].len
= seginfo
[memseg_idx
].size
;
294 for ( i
= 0; i
< seginfo
[memseg_idx
].size
/ RTE_PGSIZE_2M
; i
++)
295 memseg
[memseg_idx
].mfn
[i
] = seginfo
[memseg_idx
].mfn
[i
];
297 /* MFNs are continuous in 2M, so assume that page size is 2M */
298 memseg
[memseg_idx
].hugepage_sz
= RTE_PGSIZE_2M
;
300 memseg
[memseg_idx
].nchannel
= mcfg
->nchannel
;
301 memseg
[memseg_idx
].nrank
= mcfg
->nrank
;
303 /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
304 memseg
[memseg_idx
].socket_id
= 0;
317 * This creates the memory mappings in the secondary process to match that of
318 * the server process. It goes through each memory segment in the DPDK runtime
319 * configuration, mapping them in order to form a contiguous block in the
320 * virtual memory space
323 rte_xen_dom0_memory_attach(void)
325 const struct rte_mem_config
*mcfg
;
326 unsigned s
= 0; /* s used to track the segment number */
330 char name
[DOM0_NAME_MAX
] = {0};
331 int page_size
= getpagesize();
333 mcfg
= rte_eal_get_configuration()->mem_config
;
335 /* Check FD and open once */
337 xen_fd
= open(DOM0_MM_DEV
, O_RDWR
);
339 RTE_LOG(ERR
, EAL
, "Can not open %s\n",DOM0_MM_DEV
);
344 /* construct memory mangement name for Dom0 */
345 snprintf(name
, DOM0_NAME_MAX
, "%s-%s",
346 internal_config
.hugefile_prefix
, DEFAUL_DOM0_NAME
);
347 /* attach to memory segments of primary process */
348 ret
= ioctl(xen_fd
, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG
, name
);
350 RTE_LOG(ERR
, EAL
,"attach memory segments fail.\n");
354 /* map all segments into memory to make sure we get the addrs */
355 for (s
= 0; s
< RTE_MAX_MEMSEG
; ++s
) {
358 * the first memory segment with len==0 is the one that
359 * follows the last valid segment.
361 if (mcfg
->memseg
[s
].len
== 0)
364 vir_addr
= mmap(mcfg
->memseg
[s
].addr
, mcfg
->memseg
[s
].len
,
365 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_FIXED
, xen_fd
,
367 if (vir_addr
== MAP_FAILED
) {
368 RTE_LOG(ERR
, EAL
, "Could not mmap %llu bytes "
369 "in %s to requested address [%p]\n",
370 (unsigned long long)mcfg
->memseg
[s
].len
, DOM0_MM_DEV
,
371 mcfg
->memseg
[s
].addr
);