1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
15 #include <sys/queue.h>
17 #include <rte_fbarray.h>
18 #include <rte_memory.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_errno.h>
24 #include "eal_memalloc.h"
25 #include "eal_private.h"
26 #include "eal_internal_cfg.h"
27 #include "malloc_heap.h"
30 * Try to mmap *size bytes in /dev/zero. If it is successful, return the
31 * pointer to the mmap'd area and keep *size unmodified. Else, retry
32 * with a smaller zone: decrease *size by hugepage_sz until it reaches
33 * 0. In this case, return NULL. Note: this function returns an address
34 * which is a multiple of hugepage size.
37 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
39 static void *next_baseaddr
;
40 static uint64_t system_page_sz
;
44 * Linux kernel uses a really high address as starting address for serving
45 * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
46 * this starting address is likely too high for those devices. However, it
47 * is possible to use a lower address in the process virtual address space
48 * as with 64 bits there is a lot of available space.
50 * Current known limitations are 39 or 40 bits. Setting the starting address
51 * at 4GB implies there are 508GB or 1020GB for mapping the available
52 * hugepages. This is likely enough for most systems, although a device with
53 * addressing limitations should call rte_mem_check_dma_mask for ensuring all
54 * memory is within supported range.
56 static uint64_t baseaddr
= 0x100000000;
59 #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
61 eal_get_virtual_area(void *requested_addr
, size_t *size
,
62 size_t page_sz
, int flags
, int mmap_flags
)
64 bool addr_is_hint
, allow_shrink
, unmap
, no_align
;
66 void *mapped_addr
, *aligned_addr
;
69 if (system_page_sz
== 0)
70 system_page_sz
= sysconf(_SC_PAGESIZE
);
72 mmap_flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
74 RTE_LOG(DEBUG
, EAL
, "Ask a virtual area of 0x%zx bytes\n", *size
);
76 addr_is_hint
= (flags
& EAL_VIRTUAL_AREA_ADDR_IS_HINT
) > 0;
77 allow_shrink
= (flags
& EAL_VIRTUAL_AREA_ALLOW_SHRINK
) > 0;
78 unmap
= (flags
& EAL_VIRTUAL_AREA_UNMAP
) > 0;
80 if (next_baseaddr
== NULL
&& internal_config
.base_virtaddr
!= 0 &&
81 rte_eal_process_type() == RTE_PROC_PRIMARY
)
82 next_baseaddr
= (void *) internal_config
.base_virtaddr
;
85 if (next_baseaddr
== NULL
&& internal_config
.base_virtaddr
== 0 &&
86 rte_eal_process_type() == RTE_PROC_PRIMARY
)
87 next_baseaddr
= (void *) baseaddr
;
89 if (requested_addr
== NULL
&& next_baseaddr
!= NULL
) {
90 requested_addr
= next_baseaddr
;
91 requested_addr
= RTE_PTR_ALIGN(requested_addr
, page_sz
);
95 /* we don't need alignment of resulting pointer in the following cases:
97 * 1. page size is equal to system size
98 * 2. we have a requested address, and it is page-aligned, and we will
99 * be discarding the address if we get a different one.
101 * for all other cases, alignment is potentially necessary.
103 no_align
= (requested_addr
!= NULL
&&
104 requested_addr
== RTE_PTR_ALIGN(requested_addr
, page_sz
) &&
106 page_sz
== system_page_sz
;
109 map_sz
= no_align
? *size
: *size
+ page_sz
;
110 if (map_sz
> SIZE_MAX
) {
111 RTE_LOG(ERR
, EAL
, "Map size too big\n");
116 mapped_addr
= mmap(requested_addr
, (size_t)map_sz
, PROT_READ
,
118 if (mapped_addr
== MAP_FAILED
&& allow_shrink
)
121 if (mapped_addr
!= MAP_FAILED
&& addr_is_hint
&&
122 mapped_addr
!= requested_addr
) {
124 next_baseaddr
= RTE_PTR_ADD(next_baseaddr
, page_sz
);
125 if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES
) {
126 /* hint was not used. Try with another offset */
127 munmap(mapped_addr
, map_sz
);
128 mapped_addr
= MAP_FAILED
;
129 requested_addr
= next_baseaddr
;
132 } while ((allow_shrink
|| addr_is_hint
) &&
133 mapped_addr
== MAP_FAILED
&& *size
> 0);
135 /* align resulting address - if map failed, we will ignore the value
136 * anyway, so no need to add additional checks.
138 aligned_addr
= no_align
? mapped_addr
:
139 RTE_PTR_ALIGN(mapped_addr
, page_sz
);
142 RTE_LOG(ERR
, EAL
, "Cannot get a virtual area of any size: %s\n",
146 } else if (mapped_addr
== MAP_FAILED
) {
147 RTE_LOG(ERR
, EAL
, "Cannot get a virtual area: %s\n",
149 /* pass errno up the call chain */
152 } else if (requested_addr
!= NULL
&& !addr_is_hint
&&
153 aligned_addr
!= requested_addr
) {
154 RTE_LOG(ERR
, EAL
, "Cannot get a virtual area at requested address: %p (got %p)\n",
155 requested_addr
, aligned_addr
);
156 munmap(mapped_addr
, map_sz
);
157 rte_errno
= EADDRNOTAVAIL
;
159 } else if (requested_addr
!= NULL
&& addr_is_hint
&&
160 aligned_addr
!= requested_addr
) {
161 RTE_LOG(WARNING
, EAL
, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
162 requested_addr
, aligned_addr
);
163 RTE_LOG(WARNING
, EAL
, " This may cause issues with mapping memory into secondary processes\n");
164 } else if (next_baseaddr
!= NULL
) {
165 next_baseaddr
= RTE_PTR_ADD(aligned_addr
, *size
);
168 RTE_LOG(DEBUG
, EAL
, "Virtual area found at %p (size = 0x%zx)\n",
169 aligned_addr
, *size
);
172 munmap(mapped_addr
, map_sz
);
173 } else if (!no_align
) {
174 void *map_end
, *aligned_end
;
175 size_t before_len
, after_len
;
177 /* when we reserve space with alignment, we add alignment to
178 * mapping size. On 32-bit, if 1GB alignment was requested, this
179 * would waste 1GB of address space, which is a luxury we cannot
180 * afford. so, if alignment was performed, check if any unneeded
181 * address space can be unmapped back.
184 map_end
= RTE_PTR_ADD(mapped_addr
, (size_t)map_sz
);
185 aligned_end
= RTE_PTR_ADD(aligned_addr
, *size
);
187 /* unmap space before aligned mmap address */
188 before_len
= RTE_PTR_DIFF(aligned_addr
, mapped_addr
);
190 munmap(mapped_addr
, before_len
);
192 /* unmap space after aligned end mmap address */
193 after_len
= RTE_PTR_DIFF(map_end
, aligned_end
);
195 munmap(aligned_end
, after_len
);
201 static struct rte_memseg
*
202 virt2memseg(const void *addr
, const struct rte_memseg_list
*msl
)
204 const struct rte_fbarray
*arr
;
211 /* a memseg list was specified, check if it's the right one */
212 start
= msl
->base_va
;
213 end
= RTE_PTR_ADD(start
, msl
->len
);
215 if (addr
< start
|| addr
>= end
)
218 /* now, calculate index */
219 arr
= &msl
->memseg_arr
;
220 ms_idx
= RTE_PTR_DIFF(addr
, msl
->base_va
) / msl
->page_sz
;
221 return rte_fbarray_get(arr
, ms_idx
);
224 static struct rte_memseg_list
*
225 virt2memseg_list(const void *addr
)
227 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
228 struct rte_memseg_list
*msl
;
231 for (msl_idx
= 0; msl_idx
< RTE_MAX_MEMSEG_LISTS
; msl_idx
++) {
233 msl
= &mcfg
->memsegs
[msl_idx
];
235 start
= msl
->base_va
;
236 end
= RTE_PTR_ADD(start
, msl
->len
);
237 if (addr
>= start
&& addr
< end
)
240 /* if we didn't find our memseg list */
241 if (msl_idx
== RTE_MAX_MEMSEG_LISTS
)
246 __rte_experimental
struct rte_memseg_list
*
247 rte_mem_virt2memseg_list(const void *addr
)
249 return virt2memseg_list(addr
);
257 find_virt(const struct rte_memseg_list
*msl __rte_unused
,
258 const struct rte_memseg
*ms
, void *arg
)
260 struct virtiova
*vi
= arg
;
261 if (vi
->iova
>= ms
->iova
&& vi
->iova
< (ms
->iova
+ ms
->len
)) {
262 size_t offset
= vi
->iova
- ms
->iova
;
263 vi
->virt
= RTE_PTR_ADD(ms
->addr
, offset
);
270 find_virt_legacy(const struct rte_memseg_list
*msl __rte_unused
,
271 const struct rte_memseg
*ms
, size_t len
, void *arg
)
273 struct virtiova
*vi
= arg
;
274 if (vi
->iova
>= ms
->iova
&& vi
->iova
< (ms
->iova
+ len
)) {
275 size_t offset
= vi
->iova
- ms
->iova
;
276 vi
->virt
= RTE_PTR_ADD(ms
->addr
, offset
);
283 __rte_experimental
void *
284 rte_mem_iova2virt(rte_iova_t iova
)
288 memset(&vi
, 0, sizeof(vi
));
291 /* for legacy mem, we can get away with scanning VA-contiguous segments,
292 * as we know they are PA-contiguous as well
294 if (internal_config
.legacy_mem
)
295 rte_memseg_contig_walk(find_virt_legacy
, &vi
);
297 rte_memseg_walk(find_virt
, &vi
);
302 __rte_experimental
struct rte_memseg
*
303 rte_mem_virt2memseg(const void *addr
, const struct rte_memseg_list
*msl
)
305 return virt2memseg(addr
, msl
!= NULL
? msl
:
306 rte_mem_virt2memseg_list(addr
));
310 physmem_size(const struct rte_memseg_list
*msl
, void *arg
)
312 uint64_t *total_len
= arg
;
317 *total_len
+= msl
->memseg_arr
.count
* msl
->page_sz
;
322 /* get the total size of memory */
324 rte_eal_get_physmem_size(void)
326 uint64_t total_len
= 0;
328 rte_memseg_list_walk(physmem_size
, &total_len
);
334 dump_memseg(const struct rte_memseg_list
*msl
, const struct rte_memseg
*ms
,
337 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
338 int msl_idx
, ms_idx
, fd
;
341 msl_idx
= msl
- mcfg
->memsegs
;
342 if (msl_idx
< 0 || msl_idx
>= RTE_MAX_MEMSEG_LISTS
)
345 ms_idx
= rte_fbarray_find_idx(&msl
->memseg_arr
, ms
);
349 fd
= eal_memalloc_get_seg_fd(msl_idx
, ms_idx
);
350 fprintf(f
, "Segment %i-%i: IOVA:0x%"PRIx64
", len:%zu, "
351 "virt:%p, socket_id:%"PRId32
", "
352 "hugepage_sz:%"PRIu64
", nchannel:%"PRIx32
", "
353 "nrank:%"PRIx32
" fd:%i\n",
368 * Defining here because declared in rte_memory.h, but the actual implementation
369 * is in eal_common_memalloc.c, like all other memalloc internals.
371 int __rte_experimental
372 rte_mem_event_callback_register(const char *name
, rte_mem_event_callback_t clb
,
375 /* FreeBSD boots with legacy mem enabled by default */
376 if (internal_config
.legacy_mem
) {
377 RTE_LOG(DEBUG
, EAL
, "Registering mem event callbacks not supported\n");
381 return eal_memalloc_mem_event_callback_register(name
, clb
, arg
);
384 int __rte_experimental
385 rte_mem_event_callback_unregister(const char *name
, void *arg
)
387 /* FreeBSD boots with legacy mem enabled by default */
388 if (internal_config
.legacy_mem
) {
389 RTE_LOG(DEBUG
, EAL
, "Registering mem event callbacks not supported\n");
393 return eal_memalloc_mem_event_callback_unregister(name
, arg
);
396 int __rte_experimental
397 rte_mem_alloc_validator_register(const char *name
,
398 rte_mem_alloc_validator_t clb
, int socket_id
, size_t limit
)
400 /* FreeBSD boots with legacy mem enabled by default */
401 if (internal_config
.legacy_mem
) {
402 RTE_LOG(DEBUG
, EAL
, "Registering mem alloc validators not supported\n");
406 return eal_memalloc_mem_alloc_validator_register(name
, clb
, socket_id
,
410 int __rte_experimental
411 rte_mem_alloc_validator_unregister(const char *name
, int socket_id
)
413 /* FreeBSD boots with legacy mem enabled by default */
414 if (internal_config
.legacy_mem
) {
415 RTE_LOG(DEBUG
, EAL
, "Registering mem alloc validators not supported\n");
419 return eal_memalloc_mem_alloc_validator_unregister(name
, socket_id
);
422 /* Dump the physical memory layout on console */
424 rte_dump_physmem_layout(FILE *f
)
426 rte_memseg_walk(dump_memseg
, f
);
430 check_iova(const struct rte_memseg_list
*msl __rte_unused
,
431 const struct rte_memseg
*ms
, void *arg
)
433 uint64_t *mask
= arg
;
436 /* higher address within segment */
437 iova
= (ms
->iova
+ ms
->len
) - 1;
441 RTE_LOG(DEBUG
, EAL
, "memseg iova %"PRIx64
", len %zx, out of range\n",
444 RTE_LOG(DEBUG
, EAL
, "\tusing dma mask %"PRIx64
"\n", *mask
);
448 #define MAX_DMA_MASK_BITS 63
450 /* check memseg iovas are within the required range based on dma mask */
451 static int __rte_experimental
452 check_dma_mask(uint8_t maskbits
, bool thread_unsafe
)
454 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
458 /* Sanity check. We only check width can be managed with 64 bits
459 * variables. Indeed any higher value is likely wrong. */
460 if (maskbits
> MAX_DMA_MASK_BITS
) {
461 RTE_LOG(ERR
, EAL
, "wrong dma mask size %u (Max: %u)\n",
462 maskbits
, MAX_DMA_MASK_BITS
);
466 /* create dma mask */
467 mask
= ~((1ULL << maskbits
) - 1);
470 ret
= rte_memseg_walk_thread_unsafe(check_iova
, &mask
);
472 ret
= rte_memseg_walk(check_iova
, &mask
);
476 * Dma mask precludes hugepage usage.
477 * This device can not be used and we do not need to keep
483 * we need to keep the more restricted maskbit for checking
484 * potential dynamic memory allocation in the future.
486 mcfg
->dma_maskbits
= mcfg
->dma_maskbits
== 0 ? maskbits
:
487 RTE_MIN(mcfg
->dma_maskbits
, maskbits
);
492 int __rte_experimental
493 rte_mem_check_dma_mask(uint8_t maskbits
)
495 return check_dma_mask(maskbits
, false);
498 int __rte_experimental
499 rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits
)
501 return check_dma_mask(maskbits
, true);
505 * Set dma mask to use when memory initialization is done.
507 * This function should ONLY be used by code executed before the memory
508 * initialization. PMDs should use rte_mem_check_dma_mask if addressing
509 * limitations by the device.
511 void __rte_experimental
512 rte_mem_set_dma_mask(uint8_t maskbits
)
514 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
516 mcfg
->dma_maskbits
= mcfg
->dma_maskbits
== 0 ? maskbits
:
517 RTE_MIN(mcfg
->dma_maskbits
, maskbits
);
520 /* return the number of memory channels */
521 unsigned rte_memory_get_nchannel(void)
523 return rte_eal_get_configuration()->mem_config
->nchannel
;
526 /* return the number of memory rank */
527 unsigned rte_memory_get_nrank(void)
529 return rte_eal_get_configuration()->mem_config
->nrank
;
533 rte_eal_memdevice_init(void)
535 struct rte_config
*config
;
537 if (rte_eal_process_type() == RTE_PROC_SECONDARY
)
540 config
= rte_eal_get_configuration();
541 config
->mem_config
->nchannel
= internal_config
.force_nchannel
;
542 config
->mem_config
->nrank
= internal_config
.force_nrank
;
547 /* Lock page in physical memory and prevent from swapping. */
549 rte_mem_lock_page(const void *virt
)
551 unsigned long virtual = (unsigned long)virt
;
552 int page_size
= getpagesize();
553 unsigned long aligned
= (virtual & ~(page_size
- 1));
554 return mlock((void *)aligned
, page_size
);
557 int __rte_experimental
558 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func
, void *arg
)
560 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
561 int i
, ms_idx
, ret
= 0;
563 for (i
= 0; i
< RTE_MAX_MEMSEG_LISTS
; i
++) {
564 struct rte_memseg_list
*msl
= &mcfg
->memsegs
[i
];
565 const struct rte_memseg
*ms
;
566 struct rte_fbarray
*arr
;
568 if (msl
->memseg_arr
.count
== 0)
571 arr
= &msl
->memseg_arr
;
573 ms_idx
= rte_fbarray_find_next_used(arr
, 0);
574 while (ms_idx
>= 0) {
578 ms
= rte_fbarray_get(arr
, ms_idx
);
580 /* find how many more segments there are, starting with
583 n_segs
= rte_fbarray_find_contig_used(arr
, ms_idx
);
584 len
= n_segs
* msl
->page_sz
;
586 ret
= func(msl
, ms
, len
, arg
);
589 ms_idx
= rte_fbarray_find_next_used(arr
,
596 int __rte_experimental
597 rte_memseg_contig_walk(rte_memseg_contig_walk_t func
, void *arg
)
599 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
602 /* do not allow allocations/frees/init while we iterate */
603 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
604 ret
= rte_memseg_contig_walk_thread_unsafe(func
, arg
);
605 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);
610 int __rte_experimental
611 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func
, void *arg
)
613 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
614 int i
, ms_idx
, ret
= 0;
616 for (i
= 0; i
< RTE_MAX_MEMSEG_LISTS
; i
++) {
617 struct rte_memseg_list
*msl
= &mcfg
->memsegs
[i
];
618 const struct rte_memseg
*ms
;
619 struct rte_fbarray
*arr
;
621 if (msl
->memseg_arr
.count
== 0)
624 arr
= &msl
->memseg_arr
;
626 ms_idx
= rte_fbarray_find_next_used(arr
, 0);
627 while (ms_idx
>= 0) {
628 ms
= rte_fbarray_get(arr
, ms_idx
);
629 ret
= func(msl
, ms
, arg
);
632 ms_idx
= rte_fbarray_find_next_used(arr
, ms_idx
+ 1);
638 int __rte_experimental
639 rte_memseg_walk(rte_memseg_walk_t func
, void *arg
)
641 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
644 /* do not allow allocations/frees/init while we iterate */
645 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
646 ret
= rte_memseg_walk_thread_unsafe(func
, arg
);
647 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);
652 int __rte_experimental
653 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func
, void *arg
)
655 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
658 for (i
= 0; i
< RTE_MAX_MEMSEG_LISTS
; i
++) {
659 struct rte_memseg_list
*msl
= &mcfg
->memsegs
[i
];
661 if (msl
->base_va
== NULL
)
664 ret
= func(msl
, arg
);
671 int __rte_experimental
672 rte_memseg_list_walk(rte_memseg_list_walk_t func
, void *arg
)
674 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
677 /* do not allow allocations/frees/init while we iterate */
678 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
679 ret
= rte_memseg_list_walk_thread_unsafe(func
, arg
);
680 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);
685 int __rte_experimental
686 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg
*ms
)
688 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
689 struct rte_memseg_list
*msl
;
690 struct rte_fbarray
*arr
;
691 int msl_idx
, seg_idx
, ret
;
698 msl
= rte_mem_virt2memseg_list(ms
->addr
);
703 arr
= &msl
->memseg_arr
;
705 msl_idx
= msl
- mcfg
->memsegs
;
706 seg_idx
= rte_fbarray_find_idx(arr
, ms
);
708 if (!rte_fbarray_is_used(arr
, seg_idx
)) {
713 /* segment fd API is not supported for external segments */
719 ret
= eal_memalloc_get_seg_fd(msl_idx
, seg_idx
);
727 int __rte_experimental
728 rte_memseg_get_fd(const struct rte_memseg
*ms
)
730 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
733 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
734 ret
= rte_memseg_get_fd_thread_unsafe(ms
);
735 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);
740 int __rte_experimental
741 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg
*ms
,
744 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
745 struct rte_memseg_list
*msl
;
746 struct rte_fbarray
*arr
;
747 int msl_idx
, seg_idx
, ret
;
749 if (ms
== NULL
|| offset
== NULL
) {
754 msl
= rte_mem_virt2memseg_list(ms
->addr
);
759 arr
= &msl
->memseg_arr
;
761 msl_idx
= msl
- mcfg
->memsegs
;
762 seg_idx
= rte_fbarray_find_idx(arr
, ms
);
764 if (!rte_fbarray_is_used(arr
, seg_idx
)) {
769 /* segment fd API is not supported for external segments */
775 ret
= eal_memalloc_get_seg_fd_offset(msl_idx
, seg_idx
, offset
);
783 int __rte_experimental
784 rte_memseg_get_fd_offset(const struct rte_memseg
*ms
, size_t *offset
)
786 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
789 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
790 ret
= rte_memseg_get_fd_offset_thread_unsafe(ms
, offset
);
791 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);
796 int __rte_experimental
797 rte_extmem_register(void *va_addr
, size_t len
, rte_iova_t iova_addrs
[],
798 unsigned int n_pages
, size_t page_sz
)
800 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
801 unsigned int socket_id
, n
;
804 if (va_addr
== NULL
|| page_sz
== 0 || len
== 0 ||
805 !rte_is_power_of_2(page_sz
) ||
806 RTE_ALIGN(len
, page_sz
) != len
||
807 ((len
/ page_sz
) != n_pages
&& iova_addrs
!= NULL
) ||
808 !rte_is_aligned(va_addr
, page_sz
)) {
812 rte_rwlock_write_lock(&mcfg
->memory_hotplug_lock
);
814 /* make sure the segment doesn't already exist */
815 if (malloc_heap_find_external_seg(va_addr
, len
) != NULL
) {
821 /* get next available socket ID */
822 socket_id
= mcfg
->next_socket_id
;
823 if (socket_id
> INT32_MAX
) {
824 RTE_LOG(ERR
, EAL
, "Cannot assign new socket ID's\n");
830 /* we can create a new memseg */
832 if (malloc_heap_create_external_seg(va_addr
, iova_addrs
, n
,
833 page_sz
, "extmem", socket_id
) == NULL
) {
838 /* memseg list successfully created - increment next socket ID */
839 mcfg
->next_socket_id
++;
841 rte_rwlock_write_unlock(&mcfg
->memory_hotplug_lock
);
845 int __rte_experimental
846 rte_extmem_unregister(void *va_addr
, size_t len
)
848 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
849 struct rte_memseg_list
*msl
;
852 if (va_addr
== NULL
|| len
== 0) {
856 rte_rwlock_write_lock(&mcfg
->memory_hotplug_lock
);
858 /* find our segment */
859 msl
= malloc_heap_find_external_seg(va_addr
, len
);
866 ret
= malloc_heap_destroy_external_seg(msl
);
868 rte_rwlock_write_unlock(&mcfg
->memory_hotplug_lock
);
873 sync_memory(void *va_addr
, size_t len
, bool attach
)
875 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
876 struct rte_memseg_list
*msl
;
879 if (va_addr
== NULL
|| len
== 0) {
883 rte_rwlock_write_lock(&mcfg
->memory_hotplug_lock
);
885 /* find our segment */
886 msl
= malloc_heap_find_external_seg(va_addr
, len
);
893 ret
= rte_fbarray_attach(&msl
->memseg_arr
);
895 ret
= rte_fbarray_detach(&msl
->memseg_arr
);
898 rte_rwlock_write_unlock(&mcfg
->memory_hotplug_lock
);
902 int __rte_experimental
903 rte_extmem_attach(void *va_addr
, size_t len
)
905 return sync_memory(va_addr
, len
, true);
908 int __rte_experimental
909 rte_extmem_detach(void *va_addr
, size_t len
)
911 return sync_memory(va_addr
, len
, false);
914 /* init memory subsystem */
916 rte_eal_memory_init(void)
918 struct rte_mem_config
*mcfg
= rte_eal_get_configuration()->mem_config
;
920 RTE_LOG(DEBUG
, EAL
, "Setting up physically contiguous memory...\n");
925 /* lock mem hotplug here, to prevent races while we init */
926 rte_rwlock_read_lock(&mcfg
->memory_hotplug_lock
);
928 if (rte_eal_memseg_init() < 0)
931 if (eal_memalloc_init() < 0)
934 retval
= rte_eal_process_type() == RTE_PROC_PRIMARY
?
935 rte_eal_hugepage_init() :
936 rte_eal_hugepage_attach();
940 if (internal_config
.no_shconf
== 0 && rte_eal_memdevice_init() < 0)
945 rte_rwlock_read_unlock(&mcfg
->memory_hotplug_lock
);