1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
12 //===----------------------------------------------------------------------===//
14 #ifndef SANITIZER_ALLOCATOR_H
15 #define SANITIZER_ALLOCATOR_H
17 #include "sanitizer_internal_defs.h"
18 #include "sanitizer_common.h"
19 #include "sanitizer_libc.h"
20 #include "sanitizer_list.h"
21 #include "sanitizer_mutex.h"
22 #include "sanitizer_lfstack.h"
24 namespace __sanitizer
{
26 // Prints error message and kills the program.
27 void NORETURN
ReportAllocatorCannotReturnNull();
29 // SizeClassMap maps allocation sizes into size classes and back.
30 // Class 0 corresponds to size 0.
31 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
32 // Next 4 classes: 256 + i * 64 (i = 1 to 4).
33 // Next 4 classes: 512 + i * 128 (i = 1 to 4).
35 // Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
36 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
38 // This structure of the size class map gives us:
39 // - Efficient table-free class-to-size and size-to-class functions.
40 // - Difference between two consequent size classes is betweed 14% and 25%
42 // This class also gives a hint to a thread-caching allocator about the amount
43 // of chunks that need to be cached per-thread:
44 // - kMaxNumCached is the maximal number of chunks per size class.
45 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
47 // Part of output of SizeClassMap::Print():
48 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
49 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
50 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
51 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
52 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
53 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
54 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
55 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
57 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
58 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
59 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
60 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
61 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
62 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
63 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
64 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
66 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
67 // c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
68 // c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
69 // c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
71 // c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
72 // c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
73 // c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
74 // c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
76 // c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
77 // c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
78 // c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
79 // c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
83 // c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
84 // c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
85 // c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
86 // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
88 // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
90 template <uptr kMaxSizeLog
, uptr kMaxNumCachedT
, uptr kMaxBytesCachedLog
>
92 static const uptr kMinSizeLog
= 4;
93 static const uptr kMidSizeLog
= kMinSizeLog
+ 4;
94 static const uptr kMinSize
= 1 << kMinSizeLog
;
95 static const uptr kMidSize
= 1 << kMidSizeLog
;
96 static const uptr kMidClass
= kMidSize
/ kMinSize
;
97 static const uptr S
= 2;
98 static const uptr M
= (1 << S
) - 1;
101 static const uptr kMaxNumCached
= kMaxNumCachedT
;
102 // We transfer chunks between central and thread-local free lists in batches.
103 // For small size classes we allocate batches separately.
104 // For large size classes we use one of the chunks to store the batch.
105 struct TransferBatch
{
108 void *batch
[kMaxNumCached
];
111 static const uptr kMaxSize
= 1UL << kMaxSizeLog
;
112 static const uptr kNumClasses
=
113 kMidClass
+ ((kMaxSizeLog
- kMidSizeLog
) << S
) + 1;
114 COMPILER_CHECK(kNumClasses
>= 32 && kNumClasses
<= 256);
115 static const uptr kNumClassesRounded
=
116 kNumClasses
== 32 ? 32 :
117 kNumClasses
<= 64 ? 64 :
118 kNumClasses
<= 128 ? 128 : 256;
120 static uptr
Size(uptr class_id
) {
121 if (class_id
<= kMidClass
)
122 return kMinSize
* class_id
;
123 class_id
-= kMidClass
;
124 uptr t
= kMidSize
<< (class_id
>> S
);
125 return t
+ (t
>> S
) * (class_id
& M
);
128 static uptr
ClassID(uptr size
) {
129 if (size
<= kMidSize
)
130 return (size
+ kMinSize
- 1) >> kMinSizeLog
;
131 if (size
> kMaxSize
) return 0;
132 uptr l
= MostSignificantSetBitIndex(size
);
133 uptr hbits
= (size
>> (l
- S
)) & M
;
134 uptr lbits
= size
& ((1 << (l
- S
)) - 1);
135 uptr l1
= l
- kMidSizeLog
;
136 return kMidClass
+ (l1
<< S
) + hbits
+ (lbits
> 0);
139 static uptr
MaxCached(uptr class_id
) {
140 if (class_id
== 0) return 0;
141 uptr n
= (1UL << kMaxBytesCachedLog
) / Size(class_id
);
142 return Max
<uptr
>(1, Min(kMaxNumCached
, n
));
145 static void Print() {
147 uptr total_cached
= 0;
148 for (uptr i
= 0; i
< kNumClasses
; i
++) {
150 if (s
>= kMidSize
/ 2 && (s
& (s
- 1)) == 0)
153 uptr p
= prev_s
? (d
* 100 / prev_s
) : 0;
154 uptr l
= s
? MostSignificantSetBitIndex(s
) : 0;
155 uptr cached
= MaxCached(i
) * s
;
156 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
157 "cached: %zd %zd; id %zd\n",
158 i
, Size(i
), d
, p
, l
, MaxCached(i
), cached
, ClassID(s
));
159 total_cached
+= cached
;
162 Printf("Total cached: %zd\n", total_cached
);
165 static bool SizeClassRequiresSeparateTransferBatch(uptr class_id
) {
166 return Size(class_id
) < sizeof(TransferBatch
) -
167 sizeof(uptr
) * (kMaxNumCached
- MaxCached(class_id
));
170 static void Validate() {
171 for (uptr c
= 1; c
< kNumClasses
; c
++) {
172 // Printf("Validate: c%zd\n", c);
175 CHECK_EQ(ClassID(s
), c
);
176 if (c
!= kNumClasses
- 1)
177 CHECK_EQ(ClassID(s
+ 1), c
+ 1);
178 CHECK_EQ(ClassID(s
- 1), c
);
180 CHECK_GT(Size(c
), Size(c
-1));
182 CHECK_EQ(ClassID(kMaxSize
+ 1), 0);
184 for (uptr s
= 1; s
<= kMaxSize
; s
++) {
186 // Printf("s%zd => c%zd\n", s, c);
187 CHECK_LT(c
, kNumClasses
);
188 CHECK_GE(Size(c
), s
);
190 CHECK_LT(Size(c
-1), s
);
195 typedef SizeClassMap
<17, 128, 16> DefaultSizeClassMap
;
196 typedef SizeClassMap
<17, 64, 14> CompactSizeClassMap
;
197 template<class SizeClassAllocator
> struct SizeClassAllocatorLocalCache
;
199 // Memory allocator statistics
201 AllocatorStatAllocated
,
206 typedef uptr AllocatorStatCounters
[AllocatorStatCount
];
208 // Per-thread stats, live in per-thread cache.
209 class AllocatorStats
{
212 internal_memset(this, 0, sizeof(*this));
214 void InitLinkerInitialized() {}
216 void Add(AllocatorStat i
, uptr v
) {
217 v
+= atomic_load(&stats_
[i
], memory_order_relaxed
);
218 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
221 void Sub(AllocatorStat i
, uptr v
) {
222 v
= atomic_load(&stats_
[i
], memory_order_relaxed
) - v
;
223 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
226 void Set(AllocatorStat i
, uptr v
) {
227 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
230 uptr
Get(AllocatorStat i
) const {
231 return atomic_load(&stats_
[i
], memory_order_relaxed
);
235 friend class AllocatorGlobalStats
;
236 AllocatorStats
*next_
;
237 AllocatorStats
*prev_
;
238 atomic_uintptr_t stats_
[AllocatorStatCount
];
241 // Global stats, used for aggregation and querying.
242 class AllocatorGlobalStats
: public AllocatorStats
{
244 void InitLinkerInitialized() {
249 internal_memset(this, 0, sizeof(*this));
250 InitLinkerInitialized();
253 void Register(AllocatorStats
*s
) {
254 SpinMutexLock
l(&mu_
);
261 void Unregister(AllocatorStats
*s
) {
262 SpinMutexLock
l(&mu_
);
263 s
->prev_
->next_
= s
->next_
;
264 s
->next_
->prev_
= s
->prev_
;
265 for (int i
= 0; i
< AllocatorStatCount
; i
++)
266 Add(AllocatorStat(i
), s
->Get(AllocatorStat(i
)));
269 void Get(AllocatorStatCounters s
) const {
270 internal_memset(s
, 0, AllocatorStatCount
* sizeof(uptr
));
271 SpinMutexLock
l(&mu_
);
272 const AllocatorStats
*stats
= this;
274 for (int i
= 0; i
< AllocatorStatCount
; i
++)
275 s
[i
] += stats
->Get(AllocatorStat(i
));
276 stats
= stats
->next_
;
280 // All stats must be non-negative.
281 for (int i
= 0; i
< AllocatorStatCount
; i
++)
282 s
[i
] = ((sptr
)s
[i
]) >= 0 ? s
[i
] : 0;
286 mutable SpinMutex mu_
;
289 // Allocators call these callbacks on mmap/munmap.
290 struct NoOpMapUnmapCallback
{
291 void OnMap(uptr p
, uptr size
) const { }
292 void OnUnmap(uptr p
, uptr size
) const { }
295 // Callback type for iterating over chunks.
296 typedef void (*ForEachChunkCallback
)(uptr chunk
, void *arg
);
298 // SizeClassAllocator64 -- allocator for 64-bit address space.
300 // Space: a portion of address space of kSpaceSize bytes starting at
301 // a fixed address (kSpaceBeg). Both constants are powers of two and
302 // kSpaceBeg is kSpaceSize-aligned.
303 // At the beginning the entire space is mprotect-ed, then small parts of it
304 // are mapped on demand.
306 // Region: a part of Space dedicated to a single size class.
307 // There are kNumClasses Regions of equal size.
309 // UserChunk: a piece of memory returned to user.
310 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
312 // A Region looks like this:
313 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
314 template <const uptr kSpaceBeg
, const uptr kSpaceSize
,
315 const uptr kMetadataSize
, class SizeClassMap
,
316 class MapUnmapCallback
= NoOpMapUnmapCallback
>
317 class SizeClassAllocator64
{
319 typedef typename
SizeClassMap::TransferBatch Batch
;
320 typedef SizeClassAllocator64
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
321 SizeClassMap
, MapUnmapCallback
> ThisT
;
322 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
326 reinterpret_cast<uptr
>(MmapNoAccess(kSpaceBeg
, kSpaceSize
)));
327 MapWithCallback(kSpaceEnd
, AdditionalSize());
330 void MapWithCallback(uptr beg
, uptr size
) {
331 CHECK_EQ(beg
, reinterpret_cast<uptr
>(MmapFixedOrDie(beg
, size
)));
332 MapUnmapCallback().OnMap(beg
, size
);
335 void UnmapWithCallback(uptr beg
, uptr size
) {
336 MapUnmapCallback().OnUnmap(beg
, size
);
337 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
340 static bool CanAllocate(uptr size
, uptr alignment
) {
341 return size
<= SizeClassMap::kMaxSize
&&
342 alignment
<= SizeClassMap::kMaxSize
;
345 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
347 CHECK_LT(class_id
, kNumClasses
);
348 RegionInfo
*region
= GetRegionInfo(class_id
);
349 Batch
*b
= region
->free_list
.Pop();
351 b
= PopulateFreeList(stat
, c
, class_id
, region
);
352 region
->n_allocated
+= b
->count
;
356 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
357 RegionInfo
*region
= GetRegionInfo(class_id
);
358 CHECK_GT(b
->count
, 0);
359 region
->free_list
.Push(b
);
360 region
->n_freed
+= b
->count
;
363 static bool PointerIsMine(const void *p
) {
364 return reinterpret_cast<uptr
>(p
) / kSpaceSize
== kSpaceBeg
/ kSpaceSize
;
367 static uptr
GetSizeClass(const void *p
) {
368 return (reinterpret_cast<uptr
>(p
) / kRegionSize
) % kNumClassesRounded
;
371 void *GetBlockBegin(const void *p
) {
372 uptr class_id
= GetSizeClass(p
);
373 uptr size
= SizeClassMap::Size(class_id
);
374 if (!size
) return nullptr;
375 uptr chunk_idx
= GetChunkIdx((uptr
)p
, size
);
376 uptr reg_beg
= (uptr
)p
& ~(kRegionSize
- 1);
377 uptr beg
= chunk_idx
* size
;
378 uptr next_beg
= beg
+ size
;
379 if (class_id
>= kNumClasses
) return nullptr;
380 RegionInfo
*region
= GetRegionInfo(class_id
);
381 if (region
->mapped_user
>= next_beg
)
382 return reinterpret_cast<void*>(reg_beg
+ beg
);
386 static uptr
GetActuallyAllocatedSize(void *p
) {
387 CHECK(PointerIsMine(p
));
388 return SizeClassMap::Size(GetSizeClass(p
));
391 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
393 void *GetMetaData(const void *p
) {
394 uptr class_id
= GetSizeClass(p
);
395 uptr size
= SizeClassMap::Size(class_id
);
396 uptr chunk_idx
= GetChunkIdx(reinterpret_cast<uptr
>(p
), size
);
397 return reinterpret_cast<void*>(kSpaceBeg
+ (kRegionSize
* (class_id
+ 1)) -
398 (1 + chunk_idx
) * kMetadataSize
);
401 uptr
TotalMemoryUsed() {
403 for (uptr i
= 0; i
< kNumClasses
; i
++)
404 res
+= GetRegionInfo(i
)->allocated_user
;
409 void TestOnlyUnmap() {
410 UnmapWithCallback(kSpaceBeg
, kSpaceSize
+ AdditionalSize());
414 uptr total_mapped
= 0;
415 uptr n_allocated
= 0;
417 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
418 RegionInfo
*region
= GetRegionInfo(class_id
);
419 total_mapped
+= region
->mapped_user
;
420 n_allocated
+= region
->n_allocated
;
421 n_freed
+= region
->n_freed
;
423 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
425 total_mapped
>> 20, n_allocated
, n_allocated
- n_freed
);
426 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
427 RegionInfo
*region
= GetRegionInfo(class_id
);
428 if (region
->mapped_user
== 0) continue;
429 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
431 SizeClassMap::Size(class_id
),
432 region
->mapped_user
>> 10,
434 region
->n_allocated
- region
->n_freed
);
438 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
439 // introspection API.
441 for (uptr i
= 0; i
< kNumClasses
; i
++) {
442 GetRegionInfo(i
)->mutex
.Lock();
447 for (int i
= (int)kNumClasses
- 1; i
>= 0; i
--) {
448 GetRegionInfo(i
)->mutex
.Unlock();
452 // Iterate over all existing chunks.
453 // The allocator must be locked when calling this function.
454 void ForEachChunk(ForEachChunkCallback callback
, void *arg
) {
455 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
456 RegionInfo
*region
= GetRegionInfo(class_id
);
457 uptr chunk_size
= SizeClassMap::Size(class_id
);
458 uptr region_beg
= kSpaceBeg
+ class_id
* kRegionSize
;
459 for (uptr chunk
= region_beg
;
460 chunk
< region_beg
+ region
->allocated_user
;
461 chunk
+= chunk_size
) {
462 // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
463 callback(chunk
, arg
);
468 static uptr
AdditionalSize() {
469 return RoundUpTo(sizeof(RegionInfo
) * kNumClassesRounded
,
470 GetPageSizeCached());
473 typedef SizeClassMap SizeClassMapT
;
474 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
475 static const uptr kNumClassesRounded
= SizeClassMap::kNumClassesRounded
;
478 static const uptr kRegionSize
= kSpaceSize
/ kNumClassesRounded
;
479 static const uptr kSpaceEnd
= kSpaceBeg
+ kSpaceSize
;
480 COMPILER_CHECK(kSpaceBeg
% kSpaceSize
== 0);
481 // kRegionSize must be >= 2^32.
482 COMPILER_CHECK((kRegionSize
) >= (1ULL << (SANITIZER_WORDSIZE
/ 2)));
483 // Populate the free list with at most this number of bytes at once
484 // or with one element if its size is greater.
485 static const uptr kPopulateSize
= 1 << 14;
486 // Call mmap for user memory with at least this size.
487 static const uptr kUserMapSize
= 1 << 16;
488 // Call mmap for metadata memory with at least this size.
489 static const uptr kMetaMapSize
= 1 << 16;
493 LFStack
<Batch
> free_list
;
494 uptr allocated_user
; // Bytes allocated for user memory.
495 uptr allocated_meta
; // Bytes allocated for metadata.
496 uptr mapped_user
; // Bytes mapped for user memory.
497 uptr mapped_meta
; // Bytes mapped for metadata.
498 uptr n_allocated
, n_freed
; // Just stats.
500 COMPILER_CHECK(sizeof(RegionInfo
) >= kCacheLineSize
);
502 RegionInfo
*GetRegionInfo(uptr class_id
) {
503 CHECK_LT(class_id
, kNumClasses
);
504 RegionInfo
*regions
= reinterpret_cast<RegionInfo
*>(kSpaceBeg
+ kSpaceSize
);
505 return ®ions
[class_id
];
508 static uptr
GetChunkIdx(uptr chunk
, uptr size
) {
509 uptr offset
= chunk
% kRegionSize
;
510 // Here we divide by a non-constant. This is costly.
511 // size always fits into 32-bits. If the offset fits too, use 32-bit div.
512 if (offset
>> (SANITIZER_WORDSIZE
/ 2))
513 return offset
/ size
;
514 return (u32
)offset
/ (u32
)size
;
517 NOINLINE Batch
* PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
518 uptr class_id
, RegionInfo
*region
) {
519 BlockingMutexLock
l(®ion
->mutex
);
520 Batch
*b
= region
->free_list
.Pop();
523 uptr size
= SizeClassMap::Size(class_id
);
524 uptr count
= size
< kPopulateSize
? SizeClassMap::MaxCached(class_id
) : 1;
525 uptr beg_idx
= region
->allocated_user
;
526 uptr end_idx
= beg_idx
+ count
* size
;
527 uptr region_beg
= kSpaceBeg
+ kRegionSize
* class_id
;
528 if (end_idx
+ size
> region
->mapped_user
) {
529 // Do the mmap for the user memory.
530 uptr map_size
= kUserMapSize
;
531 while (end_idx
+ size
> region
->mapped_user
+ map_size
)
532 map_size
+= kUserMapSize
;
533 CHECK_GE(region
->mapped_user
+ map_size
, end_idx
);
534 MapWithCallback(region_beg
+ region
->mapped_user
, map_size
);
535 stat
->Add(AllocatorStatMapped
, map_size
);
536 region
->mapped_user
+= map_size
;
538 uptr total_count
= (region
->mapped_user
- beg_idx
- size
)
539 / size
/ count
* count
;
540 region
->allocated_meta
+= total_count
* kMetadataSize
;
541 if (region
->allocated_meta
> region
->mapped_meta
) {
542 uptr map_size
= kMetaMapSize
;
543 while (region
->allocated_meta
> region
->mapped_meta
+ map_size
)
544 map_size
+= kMetaMapSize
;
545 // Do the mmap for the metadata.
546 CHECK_GE(region
->mapped_meta
+ map_size
, region
->allocated_meta
);
547 MapWithCallback(region_beg
+ kRegionSize
-
548 region
->mapped_meta
- map_size
, map_size
);
549 region
->mapped_meta
+= map_size
;
551 CHECK_LE(region
->allocated_meta
, region
->mapped_meta
);
552 if (region
->mapped_user
+ region
->mapped_meta
> kRegionSize
) {
553 Printf("%s: Out of memory. Dying. ", SanitizerToolName
);
554 Printf("The process has exhausted %zuMB for size class %zu.\n",
555 kRegionSize
/ 1024 / 1024, size
);
559 if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id
))
560 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
562 b
= (Batch
*)(region_beg
+ beg_idx
);
564 for (uptr i
= 0; i
< count
; i
++)
565 b
->batch
[i
] = (void*)(region_beg
+ beg_idx
+ i
* size
);
566 region
->allocated_user
+= count
* size
;
567 CHECK_LE(region
->allocated_user
, region
->mapped_user
);
568 beg_idx
+= count
* size
;
569 if (beg_idx
+ count
* size
+ size
> region
->mapped_user
)
571 CHECK_GT(b
->count
, 0);
572 region
->free_list
.Push(b
);
578 // Maps integers in rage [0, kSize) to u8 values.
582 void TestOnlyInit() {
583 internal_memset(map_
, 0, sizeof(map_
));
586 void set(uptr idx
, u8 val
) {
587 CHECK_LT(idx
, kSize
);
588 CHECK_EQ(0U, map_
[idx
]);
591 u8
operator[] (uptr idx
) {
592 CHECK_LT(idx
, kSize
);
593 // FIXME: CHECK may be too expensive here.
600 // TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
601 // It is implemented as a two-dimensional array: array of kSize1 pointers
602 // to kSize2-byte arrays. The secondary arrays are mmaped on demand.
603 // Each value is initially zero and can be set to something else only once.
604 // Setting and getting values from multiple threads is safe w/o extra locking.
605 template <u64 kSize1
, u64 kSize2
, class MapUnmapCallback
= NoOpMapUnmapCallback
>
606 class TwoLevelByteMap
{
608 void TestOnlyInit() {
609 internal_memset(map1_
, 0, sizeof(map1_
));
613 void TestOnlyUnmap() {
614 for (uptr i
= 0; i
< kSize1
; i
++) {
617 MapUnmapCallback().OnUnmap(reinterpret_cast<uptr
>(p
), kSize2
);
618 UnmapOrDie(p
, kSize2
);
622 uptr
size() const { return kSize1
* kSize2
; }
623 uptr
size1() const { return kSize1
; }
624 uptr
size2() const { return kSize2
; }
626 void set(uptr idx
, u8 val
) {
627 CHECK_LT(idx
, kSize1
* kSize2
);
628 u8
*map2
= GetOrCreate(idx
/ kSize2
);
629 CHECK_EQ(0U, map2
[idx
% kSize2
]);
630 map2
[idx
% kSize2
] = val
;
633 u8
operator[] (uptr idx
) const {
634 CHECK_LT(idx
, kSize1
* kSize2
);
635 u8
*map2
= Get(idx
/ kSize2
);
637 return map2
[idx
% kSize2
];
641 u8
*Get(uptr idx
) const {
642 CHECK_LT(idx
, kSize1
);
643 return reinterpret_cast<u8
*>(
644 atomic_load(&map1_
[idx
], memory_order_acquire
));
647 u8
*GetOrCreate(uptr idx
) {
650 SpinMutexLock
l(&mu_
);
651 if (!(res
= Get(idx
))) {
652 res
= (u8
*)MmapOrDie(kSize2
, "TwoLevelByteMap");
653 MapUnmapCallback().OnMap(reinterpret_cast<uptr
>(res
), kSize2
);
654 atomic_store(&map1_
[idx
], reinterpret_cast<uptr
>(res
),
655 memory_order_release
);
661 atomic_uintptr_t map1_
[kSize1
];
665 // SizeClassAllocator32 -- allocator for 32-bit address space.
666 // This allocator can theoretically be used on 64-bit arch, but there it is less
667 // efficient than SizeClassAllocator64.
669 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
670 // be returned by MmapOrDie().
673 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
674 // Since the regions are aligned by kRegionSize, there are exactly
675 // kNumPossibleRegions possible regions in the address space and so we keep
676 // a ByteMap possible_regions to store the size classes of each Region.
677 // 0 size class means the region is not used by the allocator.
679 // One Region is used to allocate chunks of a single size class.
680 // A Region looks like this:
681 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
683 // In order to avoid false sharing the objects of this class should be
684 // chache-line aligned.
685 template <const uptr kSpaceBeg
, const u64 kSpaceSize
,
686 const uptr kMetadataSize
, class SizeClassMap
,
687 const uptr kRegionSizeLog
,
689 class MapUnmapCallback
= NoOpMapUnmapCallback
>
690 class SizeClassAllocator32
{
692 typedef typename
SizeClassMap::TransferBatch Batch
;
693 typedef SizeClassAllocator32
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
694 SizeClassMap
, kRegionSizeLog
, ByteMap
, MapUnmapCallback
> ThisT
;
695 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
698 possible_regions
.TestOnlyInit();
699 internal_memset(size_class_info_array
, 0, sizeof(size_class_info_array
));
702 void *MapWithCallback(uptr size
) {
703 size
= RoundUpTo(size
, GetPageSizeCached());
704 void *res
= MmapOrDie(size
, "SizeClassAllocator32");
705 MapUnmapCallback().OnMap((uptr
)res
, size
);
709 void UnmapWithCallback(uptr beg
, uptr size
) {
710 MapUnmapCallback().OnUnmap(beg
, size
);
711 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
714 static bool CanAllocate(uptr size
, uptr alignment
) {
715 return size
<= SizeClassMap::kMaxSize
&&
716 alignment
<= SizeClassMap::kMaxSize
;
719 void *GetMetaData(const void *p
) {
720 CHECK(PointerIsMine(p
));
721 uptr mem
= reinterpret_cast<uptr
>(p
);
722 uptr beg
= ComputeRegionBeg(mem
);
723 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
724 u32 offset
= mem
- beg
;
725 uptr n
= offset
/ (u32
)size
; // 32-bit division
726 uptr meta
= (beg
+ kRegionSize
) - (n
+ 1) * kMetadataSize
;
727 return reinterpret_cast<void*>(meta
);
730 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
732 CHECK_LT(class_id
, kNumClasses
);
733 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
734 SpinMutexLock
l(&sci
->mutex
);
735 if (sci
->free_list
.empty())
736 PopulateFreeList(stat
, c
, sci
, class_id
);
737 CHECK(!sci
->free_list
.empty());
738 Batch
*b
= sci
->free_list
.front();
739 sci
->free_list
.pop_front();
743 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
744 CHECK_LT(class_id
, kNumClasses
);
745 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
746 SpinMutexLock
l(&sci
->mutex
);
747 CHECK_GT(b
->count
, 0);
748 sci
->free_list
.push_front(b
);
751 bool PointerIsMine(const void *p
) {
752 return GetSizeClass(p
) != 0;
755 uptr
GetSizeClass(const void *p
) {
756 return possible_regions
[ComputeRegionId(reinterpret_cast<uptr
>(p
))];
759 void *GetBlockBegin(const void *p
) {
760 CHECK(PointerIsMine(p
));
761 uptr mem
= reinterpret_cast<uptr
>(p
);
762 uptr beg
= ComputeRegionBeg(mem
);
763 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
764 u32 offset
= mem
- beg
;
765 u32 n
= offset
/ (u32
)size
; // 32-bit division
766 uptr res
= beg
+ (n
* (u32
)size
);
767 return reinterpret_cast<void*>(res
);
770 uptr
GetActuallyAllocatedSize(void *p
) {
771 CHECK(PointerIsMine(p
));
772 return SizeClassMap::Size(GetSizeClass(p
));
775 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
777 uptr
TotalMemoryUsed() {
778 // No need to lock here.
780 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
781 if (possible_regions
[i
])
786 void TestOnlyUnmap() {
787 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
788 if (possible_regions
[i
])
789 UnmapWithCallback((i
* kRegionSize
), kRegionSize
);
792 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
793 // introspection API.
795 for (uptr i
= 0; i
< kNumClasses
; i
++) {
796 GetSizeClassInfo(i
)->mutex
.Lock();
801 for (int i
= kNumClasses
- 1; i
>= 0; i
--) {
802 GetSizeClassInfo(i
)->mutex
.Unlock();
806 // Iterate over all existing chunks.
807 // The allocator must be locked when calling this function.
808 void ForEachChunk(ForEachChunkCallback callback
, void *arg
) {
809 for (uptr region
= 0; region
< kNumPossibleRegions
; region
++)
810 if (possible_regions
[region
]) {
811 uptr chunk_size
= SizeClassMap::Size(possible_regions
[region
]);
812 uptr max_chunks_in_region
= kRegionSize
/ (chunk_size
+ kMetadataSize
);
813 uptr region_beg
= region
* kRegionSize
;
814 for (uptr chunk
= region_beg
;
815 chunk
< region_beg
+ max_chunks_in_region
* chunk_size
;
816 chunk
+= chunk_size
) {
817 // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
818 callback(chunk
, arg
);
826 static uptr
AdditionalSize() {
830 typedef SizeClassMap SizeClassMapT
;
831 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
834 static const uptr kRegionSize
= 1 << kRegionSizeLog
;
835 static const uptr kNumPossibleRegions
= kSpaceSize
/ kRegionSize
;
837 struct SizeClassInfo
{
839 IntrusiveList
<Batch
> free_list
;
840 char padding
[kCacheLineSize
- sizeof(uptr
) - sizeof(IntrusiveList
<Batch
>)];
842 COMPILER_CHECK(sizeof(SizeClassInfo
) == kCacheLineSize
);
844 uptr
ComputeRegionId(uptr mem
) {
845 uptr res
= mem
>> kRegionSizeLog
;
846 CHECK_LT(res
, kNumPossibleRegions
);
850 uptr
ComputeRegionBeg(uptr mem
) {
851 return mem
& ~(kRegionSize
- 1);
854 uptr
AllocateRegion(AllocatorStats
*stat
, uptr class_id
) {
855 CHECK_LT(class_id
, kNumClasses
);
856 uptr res
= reinterpret_cast<uptr
>(MmapAlignedOrDie(kRegionSize
, kRegionSize
,
857 "SizeClassAllocator32"));
858 MapUnmapCallback().OnMap(res
, kRegionSize
);
859 stat
->Add(AllocatorStatMapped
, kRegionSize
);
860 CHECK_EQ(0U, (res
& (kRegionSize
- 1)));
861 possible_regions
.set(ComputeRegionId(res
), static_cast<u8
>(class_id
));
865 SizeClassInfo
*GetSizeClassInfo(uptr class_id
) {
866 CHECK_LT(class_id
, kNumClasses
);
867 return &size_class_info_array
[class_id
];
870 void PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
871 SizeClassInfo
*sci
, uptr class_id
) {
872 uptr size
= SizeClassMap::Size(class_id
);
873 uptr reg
= AllocateRegion(stat
, class_id
);
874 uptr n_chunks
= kRegionSize
/ (size
+ kMetadataSize
);
875 uptr max_count
= SizeClassMap::MaxCached(class_id
);
877 for (uptr i
= reg
; i
< reg
+ n_chunks
* size
; i
+= size
) {
879 if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id
))
880 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
885 b
->batch
[b
->count
++] = (void*)i
;
886 if (b
->count
== max_count
) {
887 CHECK_GT(b
->count
, 0);
888 sci
->free_list
.push_back(b
);
893 CHECK_GT(b
->count
, 0);
894 sci
->free_list
.push_back(b
);
898 ByteMap possible_regions
;
899 SizeClassInfo size_class_info_array
[kNumClasses
];
902 // Objects of this type should be used as local caches for SizeClassAllocator64
903 // or SizeClassAllocator32. Since the typical use of this class is to have one
904 // object per thread in TLS, is has to be POD.
905 template<class SizeClassAllocator
>
906 struct SizeClassAllocatorLocalCache
{
907 typedef SizeClassAllocator Allocator
;
908 static const uptr kNumClasses
= SizeClassAllocator::kNumClasses
;
910 void Init(AllocatorGlobalStats
*s
) {
913 s
->Register(&stats_
);
916 void Destroy(SizeClassAllocator
*allocator
, AllocatorGlobalStats
*s
) {
919 s
->Unregister(&stats_
);
922 void *Allocate(SizeClassAllocator
*allocator
, uptr class_id
) {
923 CHECK_NE(class_id
, 0UL);
924 CHECK_LT(class_id
, kNumClasses
);
925 stats_
.Add(AllocatorStatAllocated
, SizeClassMap::Size(class_id
));
926 PerClass
*c
= &per_class_
[class_id
];
927 if (UNLIKELY(c
->count
== 0))
928 Refill(allocator
, class_id
);
929 void *res
= c
->batch
[--c
->count
];
930 PREFETCH(c
->batch
[c
->count
- 1]);
934 void Deallocate(SizeClassAllocator
*allocator
, uptr class_id
, void *p
) {
935 CHECK_NE(class_id
, 0UL);
936 CHECK_LT(class_id
, kNumClasses
);
937 // If the first allocator call on a new thread is a deallocation, then
938 // max_count will be zero, leading to check failure.
940 stats_
.Sub(AllocatorStatAllocated
, SizeClassMap::Size(class_id
));
941 PerClass
*c
= &per_class_
[class_id
];
942 CHECK_NE(c
->max_count
, 0UL);
943 if (UNLIKELY(c
->count
== c
->max_count
))
944 Drain(allocator
, class_id
);
945 c
->batch
[c
->count
++] = p
;
948 void Drain(SizeClassAllocator
*allocator
) {
949 for (uptr class_id
= 0; class_id
< kNumClasses
; class_id
++) {
950 PerClass
*c
= &per_class_
[class_id
];
952 Drain(allocator
, class_id
);
957 typedef typename
SizeClassAllocator::SizeClassMapT SizeClassMap
;
958 typedef typename
SizeClassMap::TransferBatch Batch
;
962 void *batch
[2 * SizeClassMap::kMaxNumCached
];
964 PerClass per_class_
[kNumClasses
];
965 AllocatorStats stats_
;
968 if (per_class_
[1].max_count
)
970 for (uptr i
= 0; i
< kNumClasses
; i
++) {
971 PerClass
*c
= &per_class_
[i
];
972 c
->max_count
= 2 * SizeClassMap::MaxCached(i
);
976 NOINLINE
void Refill(SizeClassAllocator
*allocator
, uptr class_id
) {
978 PerClass
*c
= &per_class_
[class_id
];
979 Batch
*b
= allocator
->AllocateBatch(&stats_
, this, class_id
);
980 CHECK_GT(b
->count
, 0);
981 for (uptr i
= 0; i
< b
->count
; i
++)
982 c
->batch
[i
] = b
->batch
[i
];
984 if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id
))
985 Deallocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)), b
);
988 NOINLINE
void Drain(SizeClassAllocator
*allocator
, uptr class_id
) {
990 PerClass
*c
= &per_class_
[class_id
];
992 if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id
))
993 b
= (Batch
*)Allocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)));
995 b
= (Batch
*)c
->batch
[0];
996 uptr cnt
= Min(c
->max_count
/ 2, c
->count
);
997 for (uptr i
= 0; i
< cnt
; i
++) {
998 b
->batch
[i
] = c
->batch
[i
];
999 c
->batch
[i
] = c
->batch
[i
+ c
->max_count
/ 2];
1003 CHECK_GT(b
->count
, 0);
1004 allocator
->DeallocateBatch(&stats_
, class_id
, b
);
1008 // This class can (de)allocate only large chunks of memory using mmap/unmap.
1009 // The main purpose of this allocator is to cover large and rare allocation
1010 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
1011 template <class MapUnmapCallback
= NoOpMapUnmapCallback
>
1012 class LargeMmapAllocator
{
1014 void InitLinkerInitialized(bool may_return_null
) {
1015 page_size_
= GetPageSizeCached();
1016 atomic_store(&may_return_null_
, may_return_null
, memory_order_relaxed
);
1019 void Init(bool may_return_null
) {
1020 internal_memset(this, 0, sizeof(*this));
1021 InitLinkerInitialized(may_return_null
);
1024 void *Allocate(AllocatorStats
*stat
, uptr size
, uptr alignment
) {
1025 CHECK(IsPowerOfTwo(alignment
));
1026 uptr map_size
= RoundUpMapSize(size
);
1027 if (alignment
> page_size_
)
1028 map_size
+= alignment
;
1030 if (map_size
< size
)
1031 return ReturnNullOrDie();
1032 uptr map_beg
= reinterpret_cast<uptr
>(
1033 MmapOrDie(map_size
, "LargeMmapAllocator"));
1034 CHECK(IsAligned(map_beg
, page_size_
));
1035 MapUnmapCallback().OnMap(map_beg
, map_size
);
1036 uptr map_end
= map_beg
+ map_size
;
1037 uptr res
= map_beg
+ page_size_
;
1038 if (res
& (alignment
- 1)) // Align.
1039 res
+= alignment
- (res
& (alignment
- 1));
1040 CHECK(IsAligned(res
, alignment
));
1041 CHECK(IsAligned(res
, page_size_
));
1042 CHECK_GE(res
+ size
, map_beg
);
1043 CHECK_LE(res
+ size
, map_end
);
1044 Header
*h
= GetHeader(res
);
1046 h
->map_beg
= map_beg
;
1047 h
->map_size
= map_size
;
1048 uptr size_log
= MostSignificantSetBitIndex(map_size
);
1049 CHECK_LT(size_log
, ARRAY_SIZE(stats
.by_size_log
));
1051 SpinMutexLock
l(&mutex_
);
1052 uptr idx
= n_chunks_
++;
1053 chunks_sorted_
= false;
1054 CHECK_LT(idx
, kMaxNumChunks
);
1058 stats
.currently_allocated
+= map_size
;
1059 stats
.max_allocated
= Max(stats
.max_allocated
, stats
.currently_allocated
);
1060 stats
.by_size_log
[size_log
]++;
1061 stat
->Add(AllocatorStatAllocated
, map_size
);
1062 stat
->Add(AllocatorStatMapped
, map_size
);
1064 return reinterpret_cast<void*>(res
);
1067 void *ReturnNullOrDie() {
1068 if (atomic_load(&may_return_null_
, memory_order_acquire
))
1070 ReportAllocatorCannotReturnNull();
1073 void SetMayReturnNull(bool may_return_null
) {
1074 atomic_store(&may_return_null_
, may_return_null
, memory_order_release
);
1077 void Deallocate(AllocatorStats
*stat
, void *p
) {
1078 Header
*h
= GetHeader(p
);
1080 SpinMutexLock
l(&mutex_
);
1081 uptr idx
= h
->chunk_idx
;
1082 CHECK_EQ(chunks_
[idx
], h
);
1083 CHECK_LT(idx
, n_chunks_
);
1084 chunks_
[idx
] = chunks_
[n_chunks_
- 1];
1085 chunks_
[idx
]->chunk_idx
= idx
;
1087 chunks_sorted_
= false;
1089 stats
.currently_allocated
-= h
->map_size
;
1090 stat
->Sub(AllocatorStatAllocated
, h
->map_size
);
1091 stat
->Sub(AllocatorStatMapped
, h
->map_size
);
1093 MapUnmapCallback().OnUnmap(h
->map_beg
, h
->map_size
);
1094 UnmapOrDie(reinterpret_cast<void*>(h
->map_beg
), h
->map_size
);
1097 uptr
TotalMemoryUsed() {
1098 SpinMutexLock
l(&mutex_
);
1100 for (uptr i
= 0; i
< n_chunks_
; i
++) {
1101 Header
*h
= chunks_
[i
];
1102 CHECK_EQ(h
->chunk_idx
, i
);
1103 res
+= RoundUpMapSize(h
->size
);
1108 bool PointerIsMine(const void *p
) {
1109 return GetBlockBegin(p
) != nullptr;
1112 uptr
GetActuallyAllocatedSize(void *p
) {
1113 return RoundUpTo(GetHeader(p
)->size
, page_size_
);
1116 // At least page_size_/2 metadata bytes is available.
1117 void *GetMetaData(const void *p
) {
1118 // Too slow: CHECK_EQ(p, GetBlockBegin(p));
1119 if (!IsAligned(reinterpret_cast<uptr
>(p
), page_size_
)) {
1120 Printf("%s: bad pointer %p\n", SanitizerToolName
, p
);
1121 CHECK(IsAligned(reinterpret_cast<uptr
>(p
), page_size_
));
1123 return GetHeader(p
) + 1;
1126 void *GetBlockBegin(const void *ptr
) {
1127 uptr p
= reinterpret_cast<uptr
>(ptr
);
1128 SpinMutexLock
l(&mutex_
);
1129 uptr nearest_chunk
= 0;
1130 // Cache-friendly linear search.
1131 for (uptr i
= 0; i
< n_chunks_
; i
++) {
1132 uptr ch
= reinterpret_cast<uptr
>(chunks_
[i
]);
1133 if (p
< ch
) continue; // p is at left to this chunk, skip it.
1134 if (p
- ch
< p
- nearest_chunk
)
1139 Header
*h
= reinterpret_cast<Header
*>(nearest_chunk
);
1140 CHECK_GE(nearest_chunk
, h
->map_beg
);
1141 CHECK_LT(nearest_chunk
, h
->map_beg
+ h
->map_size
);
1142 CHECK_LE(nearest_chunk
, p
);
1143 if (h
->map_beg
+ h
->map_size
<= p
)
1148 // This function does the same as GetBlockBegin, but is much faster.
1149 // Must be called with the allocator locked.
1150 void *GetBlockBeginFastLocked(void *ptr
) {
1151 mutex_
.CheckLocked();
1152 uptr p
= reinterpret_cast<uptr
>(ptr
);
1154 if (!n
) return nullptr;
1155 if (!chunks_sorted_
) {
1156 // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
1157 SortArray(reinterpret_cast<uptr
*>(chunks_
), n
);
1158 for (uptr i
= 0; i
< n
; i
++)
1159 chunks_
[i
]->chunk_idx
= i
;
1160 chunks_sorted_
= true;
1161 min_mmap_
= reinterpret_cast<uptr
>(chunks_
[0]);
1162 max_mmap_
= reinterpret_cast<uptr
>(chunks_
[n
- 1]) +
1163 chunks_
[n
- 1]->map_size
;
1165 if (p
< min_mmap_
|| p
>= max_mmap_
)
1167 uptr beg
= 0, end
= n
- 1;
1168 // This loop is a log(n) lower_bound. It does not check for the exact match
1169 // to avoid expensive cache-thrashing loads.
1170 while (end
- beg
>= 2) {
1171 uptr mid
= (beg
+ end
) / 2; // Invariant: mid >= beg + 1
1172 if (p
< reinterpret_cast<uptr
>(chunks_
[mid
]))
1173 end
= mid
- 1; // We are not interested in chunks_[mid].
1175 beg
= mid
; // chunks_[mid] may still be what we want.
1179 CHECK_EQ(beg
+ 1, end
);
1180 // There are 2 chunks left, choose one.
1181 if (p
>= reinterpret_cast<uptr
>(chunks_
[end
]))
1185 Header
*h
= chunks_
[beg
];
1186 if (h
->map_beg
+ h
->map_size
<= p
|| p
< h
->map_beg
)
1192 Printf("Stats: LargeMmapAllocator: allocated %zd times, "
1193 "remains %zd (%zd K) max %zd M; by size logs: ",
1194 stats
.n_allocs
, stats
.n_allocs
- stats
.n_frees
,
1195 stats
.currently_allocated
>> 10, stats
.max_allocated
>> 20);
1196 for (uptr i
= 0; i
< ARRAY_SIZE(stats
.by_size_log
); i
++) {
1197 uptr c
= stats
.by_size_log
[i
];
1199 Printf("%zd:%zd; ", i
, c
);
1204 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1205 // introspection API.
1210 void ForceUnlock() {
1214 // Iterate over all existing chunks.
1215 // The allocator must be locked when calling this function.
1216 void ForEachChunk(ForEachChunkCallback callback
, void *arg
) {
1217 for (uptr i
= 0; i
< n_chunks_
; i
++)
1218 callback(reinterpret_cast<uptr
>(GetUser(chunks_
[i
])), arg
);
1222 static const int kMaxNumChunks
= 1 << FIRST_32_SECOND_64(15, 18);
1230 Header
*GetHeader(uptr p
) {
1231 CHECK(IsAligned(p
, page_size_
));
1232 return reinterpret_cast<Header
*>(p
- page_size_
);
1234 Header
*GetHeader(const void *p
) {
1235 return GetHeader(reinterpret_cast<uptr
>(p
));
1238 void *GetUser(Header
*h
) {
1239 CHECK(IsAligned((uptr
)h
, page_size_
));
1240 return reinterpret_cast<void*>(reinterpret_cast<uptr
>(h
) + page_size_
);
1243 uptr
RoundUpMapSize(uptr size
) {
1244 return RoundUpTo(size
, page_size_
) + page_size_
;
1248 Header
*chunks_
[kMaxNumChunks
];
1250 uptr min_mmap_
, max_mmap_
;
1251 bool chunks_sorted_
;
1253 uptr n_allocs
, n_frees
, currently_allocated
, max_allocated
, by_size_log
[64];
1255 atomic_uint8_t may_return_null_
;
1259 // This class implements a complete memory allocator by using two
1260 // internal allocators:
1261 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
1262 // When allocating 2^x bytes it should return 2^x aligned chunk.
1263 // PrimaryAllocator is used via a local AllocatorCache.
1264 // SecondaryAllocator can allocate anything, but is not efficient.
1265 template <class PrimaryAllocator
, class AllocatorCache
,
1266 class SecondaryAllocator
> // NOLINT
1267 class CombinedAllocator
{
1269 void InitCommon(bool may_return_null
) {
1271 atomic_store(&may_return_null_
, may_return_null
, memory_order_relaxed
);
1274 void InitLinkerInitialized(bool may_return_null
) {
1275 secondary_
.InitLinkerInitialized(may_return_null
);
1276 stats_
.InitLinkerInitialized();
1277 InitCommon(may_return_null
);
1280 void Init(bool may_return_null
) {
1281 secondary_
.Init(may_return_null
);
1283 InitCommon(may_return_null
);
1286 void *Allocate(AllocatorCache
*cache
, uptr size
, uptr alignment
,
1287 bool cleared
= false, bool check_rss_limit
= false) {
1288 // Returning 0 on malloc(0) may break a lot of code.
1291 if (size
+ alignment
< size
)
1292 return ReturnNullOrDie();
1293 if (check_rss_limit
&& RssLimitIsExceeded())
1294 return ReturnNullOrDie();
1296 size
= RoundUpTo(size
, alignment
);
1298 bool from_primary
= primary_
.CanAllocate(size
, alignment
);
1300 res
= cache
->Allocate(&primary_
, primary_
.ClassID(size
));
1302 res
= secondary_
.Allocate(&stats_
, size
, alignment
);
1304 CHECK_EQ(reinterpret_cast<uptr
>(res
) & (alignment
- 1), 0);
1305 if (cleared
&& res
&& from_primary
)
1306 internal_bzero_aligned16(res
, RoundUpTo(size
, 16));
1310 bool MayReturnNull() const {
1311 return atomic_load(&may_return_null_
, memory_order_acquire
);
1314 void *ReturnNullOrDie() {
1315 if (MayReturnNull())
1317 ReportAllocatorCannotReturnNull();
1320 void SetMayReturnNull(bool may_return_null
) {
1321 secondary_
.SetMayReturnNull(may_return_null
);
1322 atomic_store(&may_return_null_
, may_return_null
, memory_order_release
);
1325 bool RssLimitIsExceeded() {
1326 return atomic_load(&rss_limit_is_exceeded_
, memory_order_acquire
);
1329 void SetRssLimitIsExceeded(bool rss_limit_is_exceeded
) {
1330 atomic_store(&rss_limit_is_exceeded_
, rss_limit_is_exceeded
,
1331 memory_order_release
);
1334 void Deallocate(AllocatorCache
*cache
, void *p
) {
1336 if (primary_
.PointerIsMine(p
))
1337 cache
->Deallocate(&primary_
, primary_
.GetSizeClass(p
), p
);
1339 secondary_
.Deallocate(&stats_
, p
);
1342 void *Reallocate(AllocatorCache
*cache
, void *p
, uptr new_size
,
1345 return Allocate(cache
, new_size
, alignment
);
1347 Deallocate(cache
, p
);
1350 CHECK(PointerIsMine(p
));
1351 uptr old_size
= GetActuallyAllocatedSize(p
);
1352 uptr memcpy_size
= Min(new_size
, old_size
);
1353 void *new_p
= Allocate(cache
, new_size
, alignment
);
1355 internal_memcpy(new_p
, p
, memcpy_size
);
1356 Deallocate(cache
, p
);
1360 bool PointerIsMine(void *p
) {
1361 if (primary_
.PointerIsMine(p
))
1363 return secondary_
.PointerIsMine(p
);
1366 bool FromPrimary(void *p
) {
1367 return primary_
.PointerIsMine(p
);
1370 void *GetMetaData(const void *p
) {
1371 if (primary_
.PointerIsMine(p
))
1372 return primary_
.GetMetaData(p
);
1373 return secondary_
.GetMetaData(p
);
1376 void *GetBlockBegin(const void *p
) {
1377 if (primary_
.PointerIsMine(p
))
1378 return primary_
.GetBlockBegin(p
);
1379 return secondary_
.GetBlockBegin(p
);
1382 // This function does the same as GetBlockBegin, but is much faster.
1383 // Must be called with the allocator locked.
1384 void *GetBlockBeginFastLocked(void *p
) {
1385 if (primary_
.PointerIsMine(p
))
1386 return primary_
.GetBlockBegin(p
);
1387 return secondary_
.GetBlockBeginFastLocked(p
);
1390 uptr
GetActuallyAllocatedSize(void *p
) {
1391 if (primary_
.PointerIsMine(p
))
1392 return primary_
.GetActuallyAllocatedSize(p
);
1393 return secondary_
.GetActuallyAllocatedSize(p
);
1396 uptr
TotalMemoryUsed() {
1397 return primary_
.TotalMemoryUsed() + secondary_
.TotalMemoryUsed();
1400 void TestOnlyUnmap() { primary_
.TestOnlyUnmap(); }
1402 void InitCache(AllocatorCache
*cache
) {
1403 cache
->Init(&stats_
);
1406 void DestroyCache(AllocatorCache
*cache
) {
1407 cache
->Destroy(&primary_
, &stats_
);
1410 void SwallowCache(AllocatorCache
*cache
) {
1411 cache
->Drain(&primary_
);
1414 void GetStats(AllocatorStatCounters s
) const {
1419 primary_
.PrintStats();
1420 secondary_
.PrintStats();
1423 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1424 // introspection API.
1426 primary_
.ForceLock();
1427 secondary_
.ForceLock();
1430 void ForceUnlock() {
1431 secondary_
.ForceUnlock();
1432 primary_
.ForceUnlock();
1435 // Iterate over all existing chunks.
1436 // The allocator must be locked when calling this function.
1437 void ForEachChunk(ForEachChunkCallback callback
, void *arg
) {
1438 primary_
.ForEachChunk(callback
, arg
);
1439 secondary_
.ForEachChunk(callback
, arg
);
1443 PrimaryAllocator primary_
;
1444 SecondaryAllocator secondary_
;
1445 AllocatorGlobalStats stats_
;
1446 atomic_uint8_t may_return_null_
;
1447 atomic_uint8_t rss_limit_is_exceeded_
;
1450 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1451 bool CallocShouldReturnNullDueToOverflow(uptr size
, uptr n
);
1453 } // namespace __sanitizer
1455 #endif // SANITIZER_ALLOCATOR_H