4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * Copyright(c) 2016 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 #include <sys/queue.h>
45 #include <rte_common.h>
47 #include <rte_debug.h>
48 #include <rte_memory.h>
49 #include <rte_memzone.h>
50 #include <rte_malloc.h>
51 #include <rte_atomic.h>
52 #include <rte_launch.h>
54 #include <rte_eal_memconfig.h>
55 #include <rte_per_lcore.h>
56 #include <rte_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_errno.h>
59 #include <rte_string_fns.h>
60 #include <rte_spinlock.h>
62 #include "rte_mempool.h"
64 TAILQ_HEAD(rte_mempool_list
, rte_tailq_entry
);
66 static struct rte_tailq_elem rte_mempool_tailq
= {
67 .name
= "RTE_MEMPOOL",
69 EAL_REGISTER_TAILQ(rte_mempool_tailq
)
71 #define CACHE_FLUSHTHRESH_MULTIPLIER 1.5
72 #define CALC_CACHE_FLUSHTHRESH(c) \
73 ((typeof(c))((c) * CACHE_FLUSHTHRESH_MULTIPLIER))
76 * return the greatest common divisor between a and b (fast algorithm)
79 static unsigned get_gcd(unsigned a
, unsigned b
)
104 * Depending on memory configuration, objects addresses are spread
105 * between channels and ranks in RAM: the pool allocator will add
106 * padding between objects. This function return the new size of the
109 static unsigned optimize_object_size(unsigned obj_size
)
111 unsigned nrank
, nchan
;
112 unsigned new_obj_size
;
114 /* get number of channels */
115 nchan
= rte_memory_get_nchannel();
119 nrank
= rte_memory_get_nrank();
123 /* process new object size */
124 new_obj_size
= (obj_size
+ RTE_MEMPOOL_ALIGN_MASK
) / RTE_MEMPOOL_ALIGN
;
125 while (get_gcd(new_obj_size
, nrank
* nchan
) != 1)
127 return new_obj_size
* RTE_MEMPOOL_ALIGN
;
131 mempool_add_elem(struct rte_mempool
*mp
, void *obj
, phys_addr_t physaddr
)
133 struct rte_mempool_objhdr
*hdr
;
134 struct rte_mempool_objtlr
*tlr __rte_unused
;
136 /* set mempool ptr in header */
137 hdr
= RTE_PTR_SUB(obj
, sizeof(*hdr
));
139 hdr
->physaddr
= physaddr
;
140 STAILQ_INSERT_TAIL(&mp
->elt_list
, hdr
, next
);
141 mp
->populated_size
++;
143 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
144 hdr
->cookie
= RTE_MEMPOOL_HEADER_COOKIE2
;
145 tlr
= __mempool_get_trailer(obj
);
146 tlr
->cookie
= RTE_MEMPOOL_TRAILER_COOKIE
;
149 /* enqueue in ring */
150 rte_mempool_ops_enqueue_bulk(mp
, &obj
, 1);
153 /* call obj_cb() for each mempool element */
155 rte_mempool_obj_iter(struct rte_mempool
*mp
,
156 rte_mempool_obj_cb_t
*obj_cb
, void *obj_cb_arg
)
158 struct rte_mempool_objhdr
*hdr
;
162 STAILQ_FOREACH(hdr
, &mp
->elt_list
, next
) {
163 obj
= (char *)hdr
+ sizeof(*hdr
);
164 obj_cb(mp
, obj_cb_arg
, obj
, n
);
171 /* call mem_cb() for each mempool memory chunk */
173 rte_mempool_mem_iter(struct rte_mempool
*mp
,
174 rte_mempool_mem_cb_t
*mem_cb
, void *mem_cb_arg
)
176 struct rte_mempool_memhdr
*hdr
;
179 STAILQ_FOREACH(hdr
, &mp
->mem_list
, next
) {
180 mem_cb(mp
, mem_cb_arg
, hdr
, n
);
187 /* get the header, trailer and total size of a mempool element. */
189 rte_mempool_calc_obj_size(uint32_t elt_size
, uint32_t flags
,
190 struct rte_mempool_objsz
*sz
)
192 struct rte_mempool_objsz lsz
;
194 sz
= (sz
!= NULL
) ? sz
: &lsz
;
196 sz
->header_size
= sizeof(struct rte_mempool_objhdr
);
197 if ((flags
& MEMPOOL_F_NO_CACHE_ALIGN
) == 0)
198 sz
->header_size
= RTE_ALIGN_CEIL(sz
->header_size
,
201 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
202 sz
->trailer_size
= sizeof(struct rte_mempool_objtlr
);
204 sz
->trailer_size
= 0;
207 /* element size is 8 bytes-aligned at least */
208 sz
->elt_size
= RTE_ALIGN_CEIL(elt_size
, sizeof(uint64_t));
210 /* expand trailer to next cache line */
211 if ((flags
& MEMPOOL_F_NO_CACHE_ALIGN
) == 0) {
212 sz
->total_size
= sz
->header_size
+ sz
->elt_size
+
214 sz
->trailer_size
+= ((RTE_MEMPOOL_ALIGN
-
215 (sz
->total_size
& RTE_MEMPOOL_ALIGN_MASK
)) &
216 RTE_MEMPOOL_ALIGN_MASK
);
220 * increase trailer to add padding between objects in order to
221 * spread them across memory channels/ranks
223 if ((flags
& MEMPOOL_F_NO_SPREAD
) == 0) {
225 new_size
= optimize_object_size(sz
->header_size
+ sz
->elt_size
+
227 sz
->trailer_size
= new_size
- sz
->header_size
- sz
->elt_size
;
230 /* this is the size of an object, including header and trailer */
231 sz
->total_size
= sz
->header_size
+ sz
->elt_size
+ sz
->trailer_size
;
233 return sz
->total_size
;
238 * Calculate maximum amount of memory required to store given number of objects.
241 rte_mempool_xmem_size(uint32_t elt_num
, size_t total_elt_sz
, uint32_t pg_shift
)
243 size_t obj_per_page
, pg_num
, pg_sz
;
245 if (total_elt_sz
== 0)
249 return total_elt_sz
* elt_num
;
251 pg_sz
= (size_t)1 << pg_shift
;
252 obj_per_page
= pg_sz
/ total_elt_sz
;
253 if (obj_per_page
== 0)
254 return RTE_ALIGN_CEIL(total_elt_sz
, pg_sz
) * elt_num
;
256 pg_num
= (elt_num
+ obj_per_page
- 1) / obj_per_page
;
257 return pg_num
<< pg_shift
;
261 * Calculate how much memory would be actually required with the
262 * given memory footprint to store required number of elements.
265 rte_mempool_xmem_usage(__rte_unused
void *vaddr
, uint32_t elt_num
,
266 size_t total_elt_sz
, const phys_addr_t paddr
[], uint32_t pg_num
,
269 uint32_t elt_cnt
= 0;
270 phys_addr_t start
, end
;
272 size_t pg_sz
= (size_t)1 << pg_shift
;
274 /* if paddr is NULL, assume contiguous memory */
277 end
= pg_sz
* pg_num
;
281 end
= paddr
[0] + pg_sz
;
284 while (elt_cnt
< elt_num
) {
286 if (end
- start
>= total_elt_sz
) {
287 /* enough contiguous memory, add an object */
288 start
+= total_elt_sz
;
290 } else if (paddr_idx
< pg_num
) {
291 /* no room to store one obj, add a page */
292 if (end
== paddr
[paddr_idx
]) {
295 start
= paddr
[paddr_idx
];
296 end
= paddr
[paddr_idx
] + pg_sz
;
301 /* no more page, return how many elements fit */
302 return -(size_t)elt_cnt
;
306 return (size_t)paddr_idx
<< pg_shift
;
309 /* free a memchunk allocated with rte_memzone_reserve() */
311 rte_mempool_memchunk_mz_free(__rte_unused
struct rte_mempool_memhdr
*memhdr
,
314 const struct rte_memzone
*mz
= opaque
;
315 rte_memzone_free(mz
);
318 /* Free memory chunks used by a mempool. Objects must be in pool */
320 rte_mempool_free_memchunks(struct rte_mempool
*mp
)
322 struct rte_mempool_memhdr
*memhdr
;
325 while (!STAILQ_EMPTY(&mp
->elt_list
)) {
326 rte_mempool_ops_dequeue_bulk(mp
, &elt
, 1);
328 STAILQ_REMOVE_HEAD(&mp
->elt_list
, next
);
329 mp
->populated_size
--;
332 while (!STAILQ_EMPTY(&mp
->mem_list
)) {
333 memhdr
= STAILQ_FIRST(&mp
->mem_list
);
334 STAILQ_REMOVE_HEAD(&mp
->mem_list
, next
);
335 if (memhdr
->free_cb
!= NULL
)
336 memhdr
->free_cb(memhdr
, memhdr
->opaque
);
342 /* Add objects in the pool, using a physically contiguous memory
343 * zone. Return the number of objects added, or a negative value
347 rte_mempool_populate_phys(struct rte_mempool
*mp
, char *vaddr
,
348 phys_addr_t paddr
, size_t len
, rte_mempool_memchunk_free_cb_t
*free_cb
,
351 unsigned total_elt_sz
;
354 struct rte_mempool_memhdr
*memhdr
;
357 /* create the internal ring if not already done */
358 if ((mp
->flags
& MEMPOOL_F_POOL_CREATED
) == 0) {
359 ret
= rte_mempool_ops_alloc(mp
);
362 mp
->flags
|= MEMPOOL_F_POOL_CREATED
;
365 /* mempool is already populated */
366 if (mp
->populated_size
>= mp
->size
)
369 total_elt_sz
= mp
->header_size
+ mp
->elt_size
+ mp
->trailer_size
;
371 memhdr
= rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr
), 0);
376 memhdr
->addr
= vaddr
;
377 memhdr
->phys_addr
= paddr
;
379 memhdr
->free_cb
= free_cb
;
380 memhdr
->opaque
= opaque
;
382 if (mp
->flags
& MEMPOOL_F_NO_CACHE_ALIGN
)
383 off
= RTE_PTR_ALIGN_CEIL(vaddr
, 8) - vaddr
;
385 off
= RTE_PTR_ALIGN_CEIL(vaddr
, RTE_CACHE_LINE_SIZE
) - vaddr
;
387 while (off
+ total_elt_sz
<= len
&& mp
->populated_size
< mp
->size
) {
388 off
+= mp
->header_size
;
389 if (paddr
== RTE_BAD_PHYS_ADDR
)
390 mempool_add_elem(mp
, (char *)vaddr
+ off
,
393 mempool_add_elem(mp
, (char *)vaddr
+ off
, paddr
+ off
);
394 off
+= mp
->elt_size
+ mp
->trailer_size
;
398 /* not enough room to store one object */
402 STAILQ_INSERT_TAIL(&mp
->mem_list
, memhdr
, next
);
407 /* Add objects in the pool, using a table of physical pages. Return the
408 * number of objects added, or a negative value on error.
411 rte_mempool_populate_phys_tab(struct rte_mempool
*mp
, char *vaddr
,
412 const phys_addr_t paddr
[], uint32_t pg_num
, uint32_t pg_shift
,
413 rte_mempool_memchunk_free_cb_t
*free_cb
, void *opaque
)
417 size_t pg_sz
= (size_t)1 << pg_shift
;
419 /* mempool must not be populated */
420 if (mp
->nb_mem_chunks
!= 0)
423 if (mp
->flags
& MEMPOOL_F_NO_PHYS_CONTIG
)
424 return rte_mempool_populate_phys(mp
, vaddr
, RTE_BAD_PHYS_ADDR
,
425 pg_num
* pg_sz
, free_cb
, opaque
);
427 for (i
= 0; i
< pg_num
&& mp
->populated_size
< mp
->size
; i
+= n
) {
429 /* populate with the largest group of contiguous pages */
430 for (n
= 1; (i
+ n
) < pg_num
&&
431 paddr
[i
+ n
- 1] + pg_sz
== paddr
[i
+ n
]; n
++)
434 ret
= rte_mempool_populate_phys(mp
, vaddr
+ i
* pg_sz
,
435 paddr
[i
], n
* pg_sz
, free_cb
, opaque
);
437 rte_mempool_free_memchunks(mp
);
440 /* no need to call the free callback for next chunks */
447 /* Populate the mempool with a virtual area. Return the number of
448 * objects added, or a negative value on error.
451 rte_mempool_populate_virt(struct rte_mempool
*mp
, char *addr
,
452 size_t len
, size_t pg_sz
, rte_mempool_memchunk_free_cb_t
*free_cb
,
456 size_t off
, phys_len
;
459 /* mempool must not be populated */
460 if (mp
->nb_mem_chunks
!= 0)
462 /* address and len must be page-aligned */
463 if (RTE_PTR_ALIGN_CEIL(addr
, pg_sz
) != addr
)
465 if (RTE_ALIGN_CEIL(len
, pg_sz
) != len
)
468 if (mp
->flags
& MEMPOOL_F_NO_PHYS_CONTIG
)
469 return rte_mempool_populate_phys(mp
, addr
, RTE_BAD_PHYS_ADDR
,
470 len
, free_cb
, opaque
);
472 for (off
= 0; off
+ pg_sz
<= len
&&
473 mp
->populated_size
< mp
->size
; off
+= phys_len
) {
475 paddr
= rte_mem_virt2phy(addr
+ off
);
476 /* required for xen_dom0 to get the machine address */
477 paddr
= rte_mem_phy2mch(-1, paddr
);
479 if (paddr
== RTE_BAD_PHYS_ADDR
) {
484 /* populate with the largest group of contiguous pages */
485 for (phys_len
= pg_sz
; off
+ phys_len
< len
; phys_len
+= pg_sz
) {
486 phys_addr_t paddr_tmp
;
488 paddr_tmp
= rte_mem_virt2phy(addr
+ off
+ phys_len
);
489 paddr_tmp
= rte_mem_phy2mch(-1, paddr_tmp
);
491 if (paddr_tmp
!= paddr
+ phys_len
)
495 ret
= rte_mempool_populate_phys(mp
, addr
+ off
, paddr
,
496 phys_len
, free_cb
, opaque
);
499 /* no need to call the free callback for next chunks */
507 rte_mempool_free_memchunks(mp
);
511 /* Default function to populate the mempool: allocate memory in memzones,
512 * and populate them. Return the number of objects added, or a negative
516 rte_mempool_populate_default(struct rte_mempool
*mp
)
518 int mz_flags
= RTE_MEMZONE_1GB
|RTE_MEMZONE_SIZE_HINT_ONLY
;
519 char mz_name
[RTE_MEMZONE_NAMESIZE
];
520 const struct rte_memzone
*mz
;
521 size_t size
, total_elt_sz
, align
, pg_sz
, pg_shift
;
526 /* mempool must not be populated */
527 if (mp
->nb_mem_chunks
!= 0)
530 if (rte_xen_dom0_supported()) {
531 pg_sz
= RTE_PGSIZE_2M
;
532 pg_shift
= rte_bsf32(pg_sz
);
534 } else if (rte_eal_has_hugepages()) {
535 pg_shift
= 0; /* not needed, zone is physically contiguous */
537 align
= RTE_CACHE_LINE_SIZE
;
539 pg_sz
= getpagesize();
540 pg_shift
= rte_bsf32(pg_sz
);
544 total_elt_sz
= mp
->header_size
+ mp
->elt_size
+ mp
->trailer_size
;
545 for (mz_id
= 0, n
= mp
->size
; n
> 0; mz_id
++, n
-= ret
) {
546 size
= rte_mempool_xmem_size(n
, total_elt_sz
, pg_shift
);
548 ret
= snprintf(mz_name
, sizeof(mz_name
),
549 RTE_MEMPOOL_MZ_FORMAT
"_%d", mp
->name
, mz_id
);
550 if (ret
< 0 || ret
>= (int)sizeof(mz_name
)) {
555 mz
= rte_memzone_reserve_aligned(mz_name
, size
,
556 mp
->socket_id
, mz_flags
, align
);
557 /* not enough memory, retry with the biggest zone we have */
559 mz
= rte_memzone_reserve_aligned(mz_name
, 0,
560 mp
->socket_id
, mz_flags
, align
);
566 if (mp
->flags
& MEMPOOL_F_NO_PHYS_CONTIG
)
567 paddr
= RTE_BAD_PHYS_ADDR
;
569 paddr
= mz
->phys_addr
;
571 if (rte_eal_has_hugepages() && !rte_xen_dom0_supported())
572 ret
= rte_mempool_populate_phys(mp
, mz
->addr
,
574 rte_mempool_memchunk_mz_free
,
575 (void *)(uintptr_t)mz
);
577 ret
= rte_mempool_populate_virt(mp
, mz
->addr
,
579 rte_mempool_memchunk_mz_free
,
580 (void *)(uintptr_t)mz
);
582 rte_memzone_free(mz
);
590 rte_mempool_free_memchunks(mp
);
594 /* return the memory size required for mempool objects in anonymous mem */
596 get_anon_size(const struct rte_mempool
*mp
)
598 size_t size
, total_elt_sz
, pg_sz
, pg_shift
;
600 pg_sz
= getpagesize();
601 pg_shift
= rte_bsf32(pg_sz
);
602 total_elt_sz
= mp
->header_size
+ mp
->elt_size
+ mp
->trailer_size
;
603 size
= rte_mempool_xmem_size(mp
->size
, total_elt_sz
, pg_shift
);
608 /* unmap a memory zone mapped by rte_mempool_populate_anon() */
610 rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr
*memhdr
,
613 munmap(opaque
, get_anon_size(memhdr
->mp
));
616 /* populate the mempool with an anonymous mapping */
618 rte_mempool_populate_anon(struct rte_mempool
*mp
)
624 /* mempool is already populated, error */
625 if (!STAILQ_EMPTY(&mp
->mem_list
)) {
630 /* get chunk of virtually continuous memory */
631 size
= get_anon_size(mp
);
632 addr
= mmap(NULL
, size
, PROT_READ
| PROT_WRITE
,
633 MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
634 if (addr
== MAP_FAILED
) {
638 /* can't use MMAP_LOCKED, it does not exist on BSD */
639 if (mlock(addr
, size
) < 0) {
645 ret
= rte_mempool_populate_virt(mp
, addr
, size
, getpagesize(),
646 rte_mempool_memchunk_anon_free
, addr
);
650 return mp
->populated_size
;
653 rte_mempool_free_memchunks(mp
);
659 rte_mempool_free(struct rte_mempool
*mp
)
661 struct rte_mempool_list
*mempool_list
= NULL
;
662 struct rte_tailq_entry
*te
;
667 mempool_list
= RTE_TAILQ_CAST(rte_mempool_tailq
.head
, rte_mempool_list
);
668 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK
);
669 /* find out tailq entry */
670 TAILQ_FOREACH(te
, mempool_list
, next
) {
671 if (te
->data
== (void *)mp
)
676 TAILQ_REMOVE(mempool_list
, te
, next
);
679 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK
);
681 rte_mempool_free_memchunks(mp
);
682 rte_mempool_ops_free(mp
);
683 rte_memzone_free(mp
->mz
);
687 mempool_cache_init(struct rte_mempool_cache
*cache
, uint32_t size
)
690 cache
->flushthresh
= CALC_CACHE_FLUSHTHRESH(size
);
695 * Create and initialize a cache for objects that are retrieved from and
696 * returned to an underlying mempool. This structure is identical to the
697 * local_cache[lcore_id] pointed to by the mempool structure.
699 struct rte_mempool_cache
*
700 rte_mempool_cache_create(uint32_t size
, int socket_id
)
702 struct rte_mempool_cache
*cache
;
704 if (size
== 0 || size
> RTE_MEMPOOL_CACHE_MAX_SIZE
) {
709 cache
= rte_zmalloc_socket("MEMPOOL_CACHE", sizeof(*cache
),
710 RTE_CACHE_LINE_SIZE
, socket_id
);
712 RTE_LOG(ERR
, MEMPOOL
, "Cannot allocate mempool cache.\n");
717 mempool_cache_init(cache
, size
);
723 * Free a cache. It's the responsibility of the user to make sure that any
724 * remaining objects in the cache are flushed to the corresponding
728 rte_mempool_cache_free(struct rte_mempool_cache
*cache
)
733 /* create an empty mempool */
735 rte_mempool_create_empty(const char *name
, unsigned n
, unsigned elt_size
,
736 unsigned cache_size
, unsigned private_data_size
,
737 int socket_id
, unsigned flags
)
739 char mz_name
[RTE_MEMZONE_NAMESIZE
];
740 struct rte_mempool_list
*mempool_list
;
741 struct rte_mempool
*mp
= NULL
;
742 struct rte_tailq_entry
*te
= NULL
;
743 const struct rte_memzone
*mz
= NULL
;
745 int mz_flags
= RTE_MEMZONE_1GB
|RTE_MEMZONE_SIZE_HINT_ONLY
;
746 struct rte_mempool_objsz objsz
;
750 /* compilation-time checks */
751 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool
) &
752 RTE_CACHE_LINE_MASK
) != 0);
753 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache
) &
754 RTE_CACHE_LINE_MASK
) != 0);
755 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
756 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats
) &
757 RTE_CACHE_LINE_MASK
) != 0);
758 RTE_BUILD_BUG_ON((offsetof(struct rte_mempool
, stats
) &
759 RTE_CACHE_LINE_MASK
) != 0);
762 mempool_list
= RTE_TAILQ_CAST(rte_mempool_tailq
.head
, rte_mempool_list
);
764 /* asked cache too big */
765 if (cache_size
> RTE_MEMPOOL_CACHE_MAX_SIZE
||
766 CALC_CACHE_FLUSHTHRESH(cache_size
) > n
) {
771 /* "no cache align" imply "no spread" */
772 if (flags
& MEMPOOL_F_NO_CACHE_ALIGN
)
773 flags
|= MEMPOOL_F_NO_SPREAD
;
775 /* calculate mempool object sizes. */
776 if (!rte_mempool_calc_obj_size(elt_size
, flags
, &objsz
)) {
781 rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK
);
784 * reserve a memory zone for this mempool: private data is
787 private_data_size
= (private_data_size
+
788 RTE_MEMPOOL_ALIGN_MASK
) & (~RTE_MEMPOOL_ALIGN_MASK
);
791 /* try to allocate tailq entry */
792 te
= rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te
), 0);
794 RTE_LOG(ERR
, MEMPOOL
, "Cannot allocate tailq entry!\n");
798 mempool_size
= MEMPOOL_HEADER_SIZE(mp
, cache_size
);
799 mempool_size
+= private_data_size
;
800 mempool_size
= RTE_ALIGN_CEIL(mempool_size
, RTE_MEMPOOL_ALIGN
);
802 ret
= snprintf(mz_name
, sizeof(mz_name
), RTE_MEMPOOL_MZ_FORMAT
, name
);
803 if (ret
< 0 || ret
>= (int)sizeof(mz_name
)) {
804 rte_errno
= ENAMETOOLONG
;
808 mz
= rte_memzone_reserve(mz_name
, mempool_size
, socket_id
, mz_flags
);
812 /* init the mempool structure */
814 memset(mp
, 0, MEMPOOL_HEADER_SIZE(mp
, cache_size
));
815 ret
= snprintf(mp
->name
, sizeof(mp
->name
), "%s", name
);
816 if (ret
< 0 || ret
>= (int)sizeof(mp
->name
)) {
817 rte_errno
= ENAMETOOLONG
;
821 mp
->socket_id
= socket_id
;
824 mp
->socket_id
= socket_id
;
825 mp
->elt_size
= objsz
.elt_size
;
826 mp
->header_size
= objsz
.header_size
;
827 mp
->trailer_size
= objsz
.trailer_size
;
828 /* Size of default caches, zero means disabled. */
829 mp
->cache_size
= cache_size
;
830 mp
->private_data_size
= private_data_size
;
831 STAILQ_INIT(&mp
->elt_list
);
832 STAILQ_INIT(&mp
->mem_list
);
835 * local_cache pointer is set even if cache_size is zero.
836 * The local_cache points to just past the elt_pa[] array.
838 mp
->local_cache
= (struct rte_mempool_cache
*)
839 RTE_PTR_ADD(mp
, MEMPOOL_HEADER_SIZE(mp
, 0));
841 /* Init all default caches. */
842 if (cache_size
!= 0) {
843 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++)
844 mempool_cache_init(&mp
->local_cache
[lcore_id
],
850 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK
);
851 TAILQ_INSERT_TAIL(mempool_list
, te
, next
);
852 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK
);
853 rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK
);
858 rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK
);
860 rte_mempool_free(mp
);
864 /* create the mempool */
866 rte_mempool_create(const char *name
, unsigned n
, unsigned elt_size
,
867 unsigned cache_size
, unsigned private_data_size
,
868 rte_mempool_ctor_t
*mp_init
, void *mp_init_arg
,
869 rte_mempool_obj_cb_t
*obj_init
, void *obj_init_arg
,
870 int socket_id
, unsigned flags
)
872 struct rte_mempool
*mp
;
874 mp
= rte_mempool_create_empty(name
, n
, elt_size
, cache_size
,
875 private_data_size
, socket_id
, flags
);
880 * Since we have 4 combinations of the SP/SC/MP/MC examine the flags to
881 * set the correct index into the table of ops structs.
883 if ((flags
& MEMPOOL_F_SP_PUT
) && (flags
& MEMPOOL_F_SC_GET
))
884 rte_mempool_set_ops_byname(mp
, "ring_sp_sc", NULL
);
885 else if (flags
& MEMPOOL_F_SP_PUT
)
886 rte_mempool_set_ops_byname(mp
, "ring_sp_mc", NULL
);
887 else if (flags
& MEMPOOL_F_SC_GET
)
888 rte_mempool_set_ops_byname(mp
, "ring_mp_sc", NULL
);
890 rte_mempool_set_ops_byname(mp
, "ring_mp_mc", NULL
);
892 /* call the mempool priv initializer */
894 mp_init(mp
, mp_init_arg
);
896 if (rte_mempool_populate_default(mp
) < 0)
899 /* call the object initializers */
901 rte_mempool_obj_iter(mp
, obj_init
, obj_init_arg
);
906 rte_mempool_free(mp
);
911 * Create the mempool over already allocated chunk of memory.
912 * That external memory buffer can consists of physically disjoint pages.
913 * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
917 rte_mempool_xmem_create(const char *name
, unsigned n
, unsigned elt_size
,
918 unsigned cache_size
, unsigned private_data_size
,
919 rte_mempool_ctor_t
*mp_init
, void *mp_init_arg
,
920 rte_mempool_obj_cb_t
*obj_init
, void *obj_init_arg
,
921 int socket_id
, unsigned flags
, void *vaddr
,
922 const phys_addr_t paddr
[], uint32_t pg_num
, uint32_t pg_shift
)
924 struct rte_mempool
*mp
= NULL
;
927 /* no virtual address supplied, use rte_mempool_create() */
929 return rte_mempool_create(name
, n
, elt_size
, cache_size
,
930 private_data_size
, mp_init
, mp_init_arg
,
931 obj_init
, obj_init_arg
, socket_id
, flags
);
933 /* check that we have both VA and PA */
939 /* Check that pg_shift parameter is valid. */
940 if (pg_shift
> MEMPOOL_PG_SHIFT_MAX
) {
945 mp
= rte_mempool_create_empty(name
, n
, elt_size
, cache_size
,
946 private_data_size
, socket_id
, flags
);
950 /* call the mempool priv initializer */
952 mp_init(mp
, mp_init_arg
);
954 ret
= rte_mempool_populate_phys_tab(mp
, vaddr
, paddr
, pg_num
, pg_shift
,
956 if (ret
< 0 || ret
!= (int)mp
->size
)
959 /* call the object initializers */
961 rte_mempool_obj_iter(mp
, obj_init
, obj_init_arg
);
966 rte_mempool_free(mp
);
970 /* Return the number of entries in the mempool */
972 rte_mempool_avail_count(const struct rte_mempool
*mp
)
977 count
= rte_mempool_ops_get_count(mp
);
979 if (mp
->cache_size
== 0)
982 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++)
983 count
+= mp
->local_cache
[lcore_id
].len
;
986 * due to race condition (access to len is not locked), the
987 * total can be greater than size... so fix the result
989 if (count
> mp
->size
)
994 /* return the number of entries allocated from the mempool */
996 rte_mempool_in_use_count(const struct rte_mempool
*mp
)
998 return mp
->size
- rte_mempool_avail_count(mp
);
1002 rte_mempool_count(const struct rte_mempool
*mp
)
1004 return rte_mempool_avail_count(mp
);
1007 /* dump the cache status */
1009 rte_mempool_dump_cache(FILE *f
, const struct rte_mempool
*mp
)
1013 unsigned cache_count
;
1015 fprintf(f
, " internal cache infos:\n");
1016 fprintf(f
, " cache_size=%"PRIu32
"\n", mp
->cache_size
);
1018 if (mp
->cache_size
== 0)
1021 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++) {
1022 cache_count
= mp
->local_cache
[lcore_id
].len
;
1023 fprintf(f
, " cache_count[%u]=%"PRIu32
"\n",
1024 lcore_id
, cache_count
);
1025 count
+= cache_count
;
1027 fprintf(f
, " total_cache_count=%u\n", count
);
1031 #ifndef __INTEL_COMPILER
1032 #pragma GCC diagnostic ignored "-Wcast-qual"
1035 /* check and update cookies or panic (internal) */
1036 void rte_mempool_check_cookies(const struct rte_mempool
*mp
,
1037 void * const *obj_table_const
, unsigned n
, int free
)
1039 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1040 struct rte_mempool_objhdr
*hdr
;
1041 struct rte_mempool_objtlr
*tlr
;
1047 /* Force to drop the "const" attribute. This is done only when
1048 * DEBUG is enabled */
1049 tmp
= (void *) obj_table_const
;
1050 obj_table
= (void **) tmp
;
1055 if (rte_mempool_from_obj(obj
) != mp
)
1056 rte_panic("MEMPOOL: object is owned by another "
1059 hdr
= __mempool_get_header(obj
);
1060 cookie
= hdr
->cookie
;
1063 if (cookie
!= RTE_MEMPOOL_HEADER_COOKIE1
) {
1064 RTE_LOG(CRIT
, MEMPOOL
,
1065 "obj=%p, mempool=%p, cookie=%" PRIx64
"\n",
1066 obj
, (const void *) mp
, cookie
);
1067 rte_panic("MEMPOOL: bad header cookie (put)\n");
1069 hdr
->cookie
= RTE_MEMPOOL_HEADER_COOKIE2
;
1070 } else if (free
== 1) {
1071 if (cookie
!= RTE_MEMPOOL_HEADER_COOKIE2
) {
1072 RTE_LOG(CRIT
, MEMPOOL
,
1073 "obj=%p, mempool=%p, cookie=%" PRIx64
"\n",
1074 obj
, (const void *) mp
, cookie
);
1075 rte_panic("MEMPOOL: bad header cookie (get)\n");
1077 hdr
->cookie
= RTE_MEMPOOL_HEADER_COOKIE1
;
1078 } else if (free
== 2) {
1079 if (cookie
!= RTE_MEMPOOL_HEADER_COOKIE1
&&
1080 cookie
!= RTE_MEMPOOL_HEADER_COOKIE2
) {
1081 RTE_LOG(CRIT
, MEMPOOL
,
1082 "obj=%p, mempool=%p, cookie=%" PRIx64
"\n",
1083 obj
, (const void *) mp
, cookie
);
1084 rte_panic("MEMPOOL: bad header cookie (audit)\n");
1087 tlr
= __mempool_get_trailer(obj
);
1088 cookie
= tlr
->cookie
;
1089 if (cookie
!= RTE_MEMPOOL_TRAILER_COOKIE
) {
1090 RTE_LOG(CRIT
, MEMPOOL
,
1091 "obj=%p, mempool=%p, cookie=%" PRIx64
"\n",
1092 obj
, (const void *) mp
, cookie
);
1093 rte_panic("MEMPOOL: bad trailer cookie\n");
1098 RTE_SET_USED(obj_table_const
);
1104 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1106 mempool_obj_audit(struct rte_mempool
*mp
, __rte_unused
void *opaque
,
1107 void *obj
, __rte_unused
unsigned idx
)
1109 __mempool_check_cookies(mp
, &obj
, 1, 2);
1113 mempool_audit_cookies(struct rte_mempool
*mp
)
1117 num
= rte_mempool_obj_iter(mp
, mempool_obj_audit
, NULL
);
1118 if (num
!= mp
->size
) {
1119 rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
1120 "iterated only over %u elements\n",
1125 #define mempool_audit_cookies(mp) do {} while(0)
1128 #ifndef __INTEL_COMPILER
1129 #pragma GCC diagnostic error "-Wcast-qual"
1132 /* check cookies before and after objects */
1134 mempool_audit_cache(const struct rte_mempool
*mp
)
1136 /* check cache size consistency */
1139 if (mp
->cache_size
== 0)
1142 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++) {
1143 const struct rte_mempool_cache
*cache
;
1144 cache
= &mp
->local_cache
[lcore_id
];
1145 if (cache
->len
> cache
->flushthresh
) {
1146 RTE_LOG(CRIT
, MEMPOOL
, "badness on cache[%u]\n",
1148 rte_panic("MEMPOOL: invalid cache len\n");
1153 /* check the consistency of mempool (size, cookies, ...) */
1155 rte_mempool_audit(struct rte_mempool
*mp
)
1157 mempool_audit_cache(mp
);
1158 mempool_audit_cookies(mp
);
1160 /* For case where mempool DEBUG is not set, and cache size is 0 */
1164 /* dump the status of the mempool on the console */
1166 rte_mempool_dump(FILE *f
, struct rte_mempool
*mp
)
1168 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1169 struct rte_mempool_debug_stats sum
;
1172 struct rte_mempool_memhdr
*memhdr
;
1173 unsigned common_count
;
1174 unsigned cache_count
;
1177 RTE_ASSERT(f
!= NULL
);
1178 RTE_ASSERT(mp
!= NULL
);
1180 fprintf(f
, "mempool <%s>@%p\n", mp
->name
, mp
);
1181 fprintf(f
, " flags=%x\n", mp
->flags
);
1182 fprintf(f
, " pool=%p\n", mp
->pool_data
);
1183 fprintf(f
, " phys_addr=0x%" PRIx64
"\n", mp
->mz
->phys_addr
);
1184 fprintf(f
, " nb_mem_chunks=%u\n", mp
->nb_mem_chunks
);
1185 fprintf(f
, " size=%"PRIu32
"\n", mp
->size
);
1186 fprintf(f
, " populated_size=%"PRIu32
"\n", mp
->populated_size
);
1187 fprintf(f
, " header_size=%"PRIu32
"\n", mp
->header_size
);
1188 fprintf(f
, " elt_size=%"PRIu32
"\n", mp
->elt_size
);
1189 fprintf(f
, " trailer_size=%"PRIu32
"\n", mp
->trailer_size
);
1190 fprintf(f
, " total_obj_size=%"PRIu32
"\n",
1191 mp
->header_size
+ mp
->elt_size
+ mp
->trailer_size
);
1193 fprintf(f
, " private_data_size=%"PRIu32
"\n", mp
->private_data_size
);
1195 STAILQ_FOREACH(memhdr
, &mp
->mem_list
, next
)
1196 mem_len
+= memhdr
->len
;
1198 fprintf(f
, " avg bytes/object=%#Lf\n",
1199 (long double)mem_len
/ mp
->size
);
1202 cache_count
= rte_mempool_dump_cache(f
, mp
);
1203 common_count
= rte_mempool_ops_get_count(mp
);
1204 if ((cache_count
+ common_count
) > mp
->size
)
1205 common_count
= mp
->size
- cache_count
;
1206 fprintf(f
, " common_pool_count=%u\n", common_count
);
1208 /* sum and dump statistics */
1209 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1210 memset(&sum
, 0, sizeof(sum
));
1211 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++) {
1212 sum
.put_bulk
+= mp
->stats
[lcore_id
].put_bulk
;
1213 sum
.put_objs
+= mp
->stats
[lcore_id
].put_objs
;
1214 sum
.get_success_bulk
+= mp
->stats
[lcore_id
].get_success_bulk
;
1215 sum
.get_success_objs
+= mp
->stats
[lcore_id
].get_success_objs
;
1216 sum
.get_fail_bulk
+= mp
->stats
[lcore_id
].get_fail_bulk
;
1217 sum
.get_fail_objs
+= mp
->stats
[lcore_id
].get_fail_objs
;
1219 fprintf(f
, " stats:\n");
1220 fprintf(f
, " put_bulk=%"PRIu64
"\n", sum
.put_bulk
);
1221 fprintf(f
, " put_objs=%"PRIu64
"\n", sum
.put_objs
);
1222 fprintf(f
, " get_success_bulk=%"PRIu64
"\n", sum
.get_success_bulk
);
1223 fprintf(f
, " get_success_objs=%"PRIu64
"\n", sum
.get_success_objs
);
1224 fprintf(f
, " get_fail_bulk=%"PRIu64
"\n", sum
.get_fail_bulk
);
1225 fprintf(f
, " get_fail_objs=%"PRIu64
"\n", sum
.get_fail_objs
);
1227 fprintf(f
, " no statistics available\n");
1230 rte_mempool_audit(mp
);
1233 /* dump the status of all mempools on the console */
1235 rte_mempool_list_dump(FILE *f
)
1237 struct rte_mempool
*mp
= NULL
;
1238 struct rte_tailq_entry
*te
;
1239 struct rte_mempool_list
*mempool_list
;
1241 mempool_list
= RTE_TAILQ_CAST(rte_mempool_tailq
.head
, rte_mempool_list
);
1243 rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK
);
1245 TAILQ_FOREACH(te
, mempool_list
, next
) {
1246 mp
= (struct rte_mempool
*) te
->data
;
1247 rte_mempool_dump(f
, mp
);
1250 rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK
);
1253 /* search a mempool from its name */
1254 struct rte_mempool
*
1255 rte_mempool_lookup(const char *name
)
1257 struct rte_mempool
*mp
= NULL
;
1258 struct rte_tailq_entry
*te
;
1259 struct rte_mempool_list
*mempool_list
;
1261 mempool_list
= RTE_TAILQ_CAST(rte_mempool_tailq
.head
, rte_mempool_list
);
1263 rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK
);
1265 TAILQ_FOREACH(te
, mempool_list
, next
) {
1266 mp
= (struct rte_mempool
*) te
->data
;
1267 if (strncmp(name
, mp
->name
, RTE_MEMPOOL_NAMESIZE
) == 0)
1271 rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK
);
1281 void rte_mempool_walk(void (*func
)(struct rte_mempool
*, void *),
1284 struct rte_tailq_entry
*te
= NULL
;
1285 struct rte_mempool_list
*mempool_list
;
1288 mempool_list
= RTE_TAILQ_CAST(rte_mempool_tailq
.head
, rte_mempool_list
);
1290 rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK
);
1292 TAILQ_FOREACH_SAFE(te
, mempool_list
, next
, tmp_te
) {
1293 (*func
)((struct rte_mempool
*) te
->data
, arg
);
1296 rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK
);