]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_eal / common / malloc_elem.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4#include <inttypes.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdio.h>
8#include <string.h>
9#include <unistd.h>
10#include <sys/queue.h>
11
12#include <rte_memory.h>
13#include <rte_eal.h>
14#include <rte_launch.h>
15#include <rte_per_lcore.h>
16#include <rte_lcore.h>
17#include <rte_debug.h>
18#include <rte_common.h>
19#include <rte_spinlock.h>
20
21#include "eal_internal_cfg.h"
22#include "eal_memalloc.h"
23#include "malloc_elem.h"
24#include "malloc_heap.h"
25
9f95a23c
TL
26/*
27 * If debugging is enabled, freed memory is set to poison value
28 * to catch buggy programs. Otherwise, freed memory is set to zero
29 * to avoid having to zero in zmalloc
30 */
31#ifdef RTE_MALLOC_DEBUG
32#define MALLOC_POISON 0x6b
33#else
34#define MALLOC_POISON 0
35#endif
36
11fdf7f2
TL
37size_t
38malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
39{
40 void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
41 void *data_start, *data_end;
42 rte_iova_t expected_iova;
43 struct rte_memseg *ms;
44 size_t page_sz, cur, max;
45
46 page_sz = (size_t)elem->msl->page_sz;
47 data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
48 data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
49 /* segment must start after header and with specified alignment */
50 contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
51
9f95a23c
TL
52 /* return if aligned address is already out of malloc element */
53 if (contig_seg_start > data_end)
54 return 0;
55
11fdf7f2 56 /* if we're in IOVA as VA mode, or if we're in legacy mode with
9f95a23c
TL
57 * hugepages, all elements are IOVA-contiguous. however, we can only
58 * make these assumptions about internal memory - externally allocated
59 * segments have to be checked.
11fdf7f2 60 */
9f95a23c
TL
61 if (!elem->msl->external &&
62 (rte_eal_iova_mode() == RTE_IOVA_VA ||
63 (internal_config.legacy_mem &&
64 rte_eal_has_hugepages())))
11fdf7f2
TL
65 return RTE_PTR_DIFF(data_end, contig_seg_start);
66
67 cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
68 ms = rte_mem_virt2memseg(cur_page, elem->msl);
69
70 /* do first iteration outside the loop */
71 page_end = RTE_PTR_ADD(cur_page, page_sz);
72 cur_seg_end = RTE_MIN(page_end, data_end);
73 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
74 MALLOC_ELEM_TRAILER_LEN;
75 max = cur;
76 expected_iova = ms->iova + page_sz;
77 /* memsegs are contiguous in memory */
78 ms++;
79
80 cur_page = RTE_PTR_ADD(cur_page, page_sz);
81
82 while (cur_page < data_end) {
83 page_end = RTE_PTR_ADD(cur_page, page_sz);
84 cur_seg_end = RTE_MIN(page_end, data_end);
85
86 /* reset start of contiguous segment if unexpected iova */
87 if (ms->iova != expected_iova) {
88 /* next contiguous segment must start at specified
89 * alignment.
90 */
91 contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
92 /* new segment start may be on a different page, so find
93 * the page and skip to next iteration to make sure
94 * we're not blowing past data end.
95 */
96 ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
97 cur_page = ms->addr;
98 /* don't trigger another recalculation */
99 expected_iova = ms->iova;
100 continue;
101 }
102 /* cur_seg_end ends on a page boundary or on data end. if we're
103 * looking at data end, then malloc trailer is already included
104 * in the calculations. if we're looking at page end, then we
105 * know there's more data past this page and thus there's space
106 * for malloc element trailer, so don't count it here.
107 */
108 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
109 /* update max if cur value is bigger */
110 if (cur > max)
111 max = cur;
112
113 /* move to next page */
114 cur_page = page_end;
115 expected_iova = ms->iova + page_sz;
116 /* memsegs are contiguous in memory */
117 ms++;
118 }
119
120 return max;
121}
122
123/*
124 * Initialize a general malloc_elem header structure
125 */
126void
127malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
9f95a23c
TL
128 struct rte_memseg_list *msl, size_t size,
129 struct malloc_elem *orig_elem, size_t orig_size)
11fdf7f2
TL
130{
131 elem->heap = heap;
132 elem->msl = msl;
133 elem->prev = NULL;
134 elem->next = NULL;
135 memset(&elem->free_list, 0, sizeof(elem->free_list));
136 elem->state = ELEM_FREE;
137 elem->size = size;
138 elem->pad = 0;
9f95a23c
TL
139 elem->orig_elem = orig_elem;
140 elem->orig_size = orig_size;
11fdf7f2
TL
141 set_header(elem);
142 set_trailer(elem);
143}
144
145void
146malloc_elem_insert(struct malloc_elem *elem)
147{
148 struct malloc_elem *prev_elem, *next_elem;
149 struct malloc_heap *heap = elem->heap;
150
151 /* first and last elements must be both NULL or both non-NULL */
152 if ((heap->first == NULL) != (heap->last == NULL)) {
153 RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
154 return;
155 }
156
157 if (heap->first == NULL && heap->last == NULL) {
158 /* if empty heap */
159 heap->first = elem;
160 heap->last = elem;
161 prev_elem = NULL;
162 next_elem = NULL;
163 } else if (elem < heap->first) {
164 /* if lower than start */
165 prev_elem = NULL;
166 next_elem = heap->first;
167 heap->first = elem;
168 } else if (elem > heap->last) {
169 /* if higher than end */
170 prev_elem = heap->last;
171 next_elem = NULL;
172 heap->last = elem;
173 } else {
f67539c2 174 /* the new memory is somewhere between start and end */
11fdf7f2
TL
175 uint64_t dist_from_start, dist_from_end;
176
177 dist_from_end = RTE_PTR_DIFF(heap->last, elem);
178 dist_from_start = RTE_PTR_DIFF(elem, heap->first);
179
180 /* check which is closer, and find closest list entries */
181 if (dist_from_start < dist_from_end) {
182 prev_elem = heap->first;
183 while (prev_elem->next < elem)
184 prev_elem = prev_elem->next;
185 next_elem = prev_elem->next;
186 } else {
187 next_elem = heap->last;
188 while (next_elem->prev > elem)
189 next_elem = next_elem->prev;
190 prev_elem = next_elem->prev;
191 }
192 }
193
194 /* insert new element */
195 elem->prev = prev_elem;
196 elem->next = next_elem;
197 if (prev_elem)
198 prev_elem->next = elem;
199 if (next_elem)
200 next_elem->prev = elem;
201}
202
203/*
204 * Attempt to find enough physically contiguous memory in this block to store
205 * our data. Assume that element has at least enough space to fit in the data,
206 * so we just check the page addresses.
207 */
208static bool
209elem_check_phys_contig(const struct rte_memseg_list *msl,
210 void *start, size_t size)
211{
212 return eal_memalloc_is_contig(msl, start, size);
213}
214
215/*
216 * calculate the starting point of where data of the requested size
217 * and alignment would fit in the current element. If the data doesn't
218 * fit, return NULL.
219 */
220static void *
221elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
222 size_t bound, bool contig)
223{
224 size_t elem_size = elem->size;
225
226 /*
227 * we're allocating from the end, so adjust the size of element by
228 * alignment size.
229 */
230 while (elem_size >= size) {
231 const size_t bmask = ~(bound - 1);
232 uintptr_t end_pt = (uintptr_t)elem +
233 elem_size - MALLOC_ELEM_TRAILER_LEN;
234 uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
235 align);
236 uintptr_t new_elem_start;
237
238 /* check boundary */
239 if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
240 end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
241 new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
242 align);
243 end_pt = new_data_start + size;
244
245 if (((end_pt - 1) & bmask) != (new_data_start & bmask))
246 return NULL;
247 }
248
249 new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
250
251 /* if the new start point is before the exist start,
252 * it won't fit
253 */
254 if (new_elem_start < (uintptr_t)elem)
255 return NULL;
256
257 if (contig) {
258 size_t new_data_size = end_pt - new_data_start;
259
260 /*
261 * if physical contiguousness was requested and we
262 * couldn't fit all data into one physically contiguous
263 * block, try again with lower addresses.
264 */
265 if (!elem_check_phys_contig(elem->msl,
266 (void *)new_data_start,
267 new_data_size)) {
268 elem_size -= align;
269 continue;
270 }
271 }
272 return (void *)new_elem_start;
273 }
274 return NULL;
275}
276
277/*
278 * use elem_start_pt to determine if we get meet the size and
279 * alignment request from the current element
280 */
281int
282malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
283 size_t bound, bool contig)
284{
285 return elem_start_pt(elem, size, align, bound, contig) != NULL;
286}
287
288/*
289 * split an existing element into two smaller elements at the given
290 * split_pt parameter.
291 */
292static void
293split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
294{
295 struct malloc_elem *next_elem = elem->next;
296 const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
297 const size_t new_elem_size = elem->size - old_elem_size;
298
9f95a23c
TL
299 malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
300 elem->orig_elem, elem->orig_size);
11fdf7f2
TL
301 split_pt->prev = elem;
302 split_pt->next = next_elem;
303 if (next_elem)
304 next_elem->prev = split_pt;
305 else
306 elem->heap->last = split_pt;
307 elem->next = split_pt;
308 elem->size = old_elem_size;
309 set_trailer(elem);
f67539c2
TL
310 if (elem->pad) {
311 /* Update inner padding inner element size. */
312 elem = RTE_PTR_ADD(elem, elem->pad);
313 elem->size = old_elem_size - elem->pad;
314 }
11fdf7f2
TL
315}
316
317/*
318 * our malloc heap is a doubly linked list, so doubly remove our element.
319 */
320static void __rte_unused
321remove_elem(struct malloc_elem *elem)
322{
323 struct malloc_elem *next, *prev;
324 next = elem->next;
325 prev = elem->prev;
326
327 if (next)
328 next->prev = prev;
329 else
330 elem->heap->last = prev;
331 if (prev)
332 prev->next = next;
333 else
334 elem->heap->first = next;
335
336 elem->prev = NULL;
337 elem->next = NULL;
338}
339
340static int
341next_elem_is_adjacent(struct malloc_elem *elem)
342{
9f95a23c
TL
343 return elem->next == RTE_PTR_ADD(elem, elem->size) &&
344 elem->next->msl == elem->msl &&
345 (!internal_config.match_allocations ||
346 elem->orig_elem == elem->next->orig_elem);
11fdf7f2
TL
347}
348
349static int
350prev_elem_is_adjacent(struct malloc_elem *elem)
351{
9f95a23c
TL
352 return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
353 elem->prev->msl == elem->msl &&
354 (!internal_config.match_allocations ||
355 elem->orig_elem == elem->prev->orig_elem);
11fdf7f2
TL
356}
357
358/*
359 * Given an element size, compute its freelist index.
360 * We free an element into the freelist containing similarly-sized elements.
361 * We try to allocate elements starting with the freelist containing
362 * similarly-sized elements, and if necessary, we search freelists
363 * containing larger elements.
364 *
365 * Example element size ranges for a heap with five free lists:
366 * heap->free_head[0] - (0 , 2^8]
367 * heap->free_head[1] - (2^8 , 2^10]
368 * heap->free_head[2] - (2^10 ,2^12]
369 * heap->free_head[3] - (2^12, 2^14]
370 * heap->free_head[4] - (2^14, MAX_SIZE]
371 */
372size_t
373malloc_elem_free_list_index(size_t size)
374{
375#define MALLOC_MINSIZE_LOG2 8
376#define MALLOC_LOG2_INCREMENT 2
377
378 size_t log2;
379 size_t index;
380
381 if (size <= (1UL << MALLOC_MINSIZE_LOG2))
382 return 0;
383
384 /* Find next power of 2 >= size. */
385 log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
386
387 /* Compute freelist index, based on log2(size). */
388 index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
389 MALLOC_LOG2_INCREMENT;
390
391 return index <= RTE_HEAP_NUM_FREELISTS-1?
392 index: RTE_HEAP_NUM_FREELISTS-1;
393}
394
395/*
396 * Add the specified element to its heap's free list.
397 */
398void
399malloc_elem_free_list_insert(struct malloc_elem *elem)
400{
401 size_t idx;
402
403 idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
404 elem->state = ELEM_FREE;
405 LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
406}
407
408/*
409 * Remove the specified element from its heap's free list.
410 */
411void
412malloc_elem_free_list_remove(struct malloc_elem *elem)
413{
414 LIST_REMOVE(elem, free_list);
415}
416
417/*
418 * reserve a block of data in an existing malloc_elem. If the malloc_elem
419 * is much larger than the data block requested, we split the element in two.
420 * This function is only called from malloc_heap_alloc so parameter checking
421 * is not done here, as it's done there previously.
422 */
423struct malloc_elem *
424malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
425 size_t bound, bool contig)
426{
427 struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
428 contig);
429 const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
430 const size_t trailer_size = elem->size - old_elem_size - size -
431 MALLOC_ELEM_OVERHEAD;
432
433 malloc_elem_free_list_remove(elem);
434
435 if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
436 /* split it, too much free space after elem */
437 struct malloc_elem *new_free_elem =
438 RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
439
440 split_elem(elem, new_free_elem);
441 malloc_elem_free_list_insert(new_free_elem);
442
443 if (elem == elem->heap->last)
444 elem->heap->last = new_free_elem;
445 }
446
447 if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
448 /* don't split it, pad the element instead */
449 elem->state = ELEM_BUSY;
450 elem->pad = old_elem_size;
451
452 /* put a dummy header in padding, to point to real element header */
453 if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
454 * is cache-line aligned */
455 new_elem->pad = elem->pad;
456 new_elem->state = ELEM_PAD;
457 new_elem->size = elem->size - elem->pad;
458 set_header(new_elem);
459 }
460
461 return new_elem;
462 }
463
464 /* we are going to split the element in two. The original element
465 * remains free, and the new element is the one allocated.
466 * Re-insert original element, in case its new size makes it
467 * belong on a different list.
468 */
469 split_elem(elem, new_elem);
470 new_elem->state = ELEM_BUSY;
471 malloc_elem_free_list_insert(elem);
472
473 return new_elem;
474}
475
476/*
477 * join two struct malloc_elem together. elem1 and elem2 must
478 * be contiguous in memory.
479 */
480static inline void
481join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
482{
483 struct malloc_elem *next = elem2->next;
484 elem1->size += elem2->size;
485 if (next)
486 next->prev = elem1;
487 else
488 elem1->heap->last = elem1;
489 elem1->next = next;
f67539c2
TL
490 if (elem1->pad) {
491 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
492 inner->size = elem1->size - elem1->pad;
493 }
11fdf7f2
TL
494}
495
496struct malloc_elem *
497malloc_elem_join_adjacent_free(struct malloc_elem *elem)
498{
499 /*
500 * check if next element exists, is adjacent and is free, if so join
501 * with it, need to remove from free list.
502 */
503 if (elem->next != NULL && elem->next->state == ELEM_FREE &&
504 next_elem_is_adjacent(elem)) {
505 void *erase;
506 size_t erase_len;
507
508 /* we will want to erase the trailer and header */
509 erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
510 erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
511
512 /* remove from free list, join to this one */
513 malloc_elem_free_list_remove(elem->next);
514 join_elem(elem, elem->next);
515
516 /* erase header, trailer and pad */
9f95a23c 517 memset(erase, MALLOC_POISON, erase_len);
11fdf7f2
TL
518 }
519
520 /*
521 * check if prev element exists, is adjacent and is free, if so join
522 * with it, need to remove from free list.
523 */
524 if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
525 prev_elem_is_adjacent(elem)) {
526 struct malloc_elem *new_elem;
527 void *erase;
528 size_t erase_len;
529
530 /* we will want to erase trailer and header */
531 erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
532 erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
533
534 /* remove from free list, join to this one */
535 malloc_elem_free_list_remove(elem->prev);
536
537 new_elem = elem->prev;
538 join_elem(new_elem, elem);
539
540 /* erase header, trailer and pad */
9f95a23c 541 memset(erase, MALLOC_POISON, erase_len);
11fdf7f2
TL
542
543 elem = new_elem;
544 }
545
546 return elem;
547}
548
549/*
550 * free a malloc_elem block by adding it to the free list. If the
551 * blocks either immediately before or immediately after newly freed block
552 * are also free, the blocks are merged together.
553 */
554struct malloc_elem *
555malloc_elem_free(struct malloc_elem *elem)
556{
557 void *ptr;
558 size_t data_len;
559
560 ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
561 data_len = elem->size - MALLOC_ELEM_OVERHEAD;
562
563 elem = malloc_elem_join_adjacent_free(elem);
564
565 malloc_elem_free_list_insert(elem);
566
567 elem->pad = 0;
568
569 /* decrease heap's count of allocated elements */
570 elem->heap->alloc_count--;
571
9f95a23c
TL
572 /* poison memory */
573 memset(ptr, MALLOC_POISON, data_len);
11fdf7f2
TL
574
575 return elem;
576}
577
578/* assume all checks were already done */
579void
580malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
581{
582 struct malloc_elem *hide_start, *hide_end, *prev, *next;
583 size_t len_before, len_after;
584
585 hide_start = start;
586 hide_end = RTE_PTR_ADD(start, len);
587
588 prev = elem->prev;
589 next = elem->next;
590
591 /* we cannot do anything with non-adjacent elements */
592 if (next && next_elem_is_adjacent(elem)) {
593 len_after = RTE_PTR_DIFF(next, hide_end);
594 if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
595 /* split after */
596 split_elem(elem, hide_end);
597
598 malloc_elem_free_list_insert(hide_end);
599 } else if (len_after > 0) {
600 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
601 return;
602 }
603 }
604
605 /* we cannot do anything with non-adjacent elements */
606 if (prev && prev_elem_is_adjacent(elem)) {
607 len_before = RTE_PTR_DIFF(hide_start, elem);
608 if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
609 /* split before */
610 split_elem(elem, hide_start);
611
612 prev = elem;
613 elem = hide_start;
614
615 malloc_elem_free_list_insert(prev);
616 } else if (len_before > 0) {
617 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
618 return;
619 }
620 }
621
622 remove_elem(elem);
623}
624
625/*
626 * attempt to resize a malloc_elem by expanding into any free space
627 * immediately after it in memory.
628 */
629int
630malloc_elem_resize(struct malloc_elem *elem, size_t size)
631{
632 const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
633
634 /* if we request a smaller size, then always return ok */
635 if (elem->size >= new_size)
636 return 0;
637
638 /* check if there is a next element, it's free and adjacent */
639 if (!elem->next || elem->next->state != ELEM_FREE ||
640 !next_elem_is_adjacent(elem))
641 return -1;
642 if (elem->size + elem->next->size < new_size)
643 return -1;
644
645 /* we now know the element fits, so remove from free list,
646 * join the two
647 */
648 malloc_elem_free_list_remove(elem->next);
649 join_elem(elem, elem->next);
650
651 if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
652 /* now we have a big block together. Lets cut it down a bit, by splitting */
653 struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
654 split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
655 split_elem(elem, split_pt);
656 malloc_elem_free_list_insert(split_pt);
657 }
658 return 0;
659}
660
661static inline const char *
662elem_state_to_str(enum elem_state state)
663{
664 switch (state) {
665 case ELEM_PAD:
666 return "PAD";
667 case ELEM_BUSY:
668 return "BUSY";
669 case ELEM_FREE:
670 return "FREE";
671 }
672 return "ERROR";
673}
674
675void
676malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
677{
678 fprintf(f, "Malloc element at %p (%s)\n", elem,
679 elem_state_to_str(elem->state));
680 fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
681 fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);
682}