]>
Commit | Line | Data |
---|---|---|
3157f602 XL |
1 | /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ |
2 | |* | |
3 | |* The LLVM Compiler Infrastructure | |
4 | |* | |
5 | |* This file is distributed under the University of Illinois Open Source | |
6 | |* License. See LICENSE.TXT for details. | |
7 | |* | |
8 | \*===----------------------------------------------------------------------===*/ | |
9 | ||
3157f602 XL |
10 | #include <limits.h> |
11 | #include <stdio.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
2c00a5a8 XL |
14 | |
15 | #include "InstrProfiling.h" | |
16 | #include "InstrProfilingInternal.h" | |
17 | #include "InstrProfilingUtil.h" | |
18 | ||
3157f602 XL |
19 | #define INSTR_PROF_VALUE_PROF_DATA |
20 | #define INSTR_PROF_COMMON_API_IMPL | |
21 | #include "InstrProfData.inc" | |
22 | ||
5bcae85e SL |
23 | static int hasStaticCounters = 1; |
24 | static int OutOfNodesWarnings = 0; | |
25 | static int hasNonDefaultValsPerSite = 0; | |
26 | #define INSTR_PROF_MAX_VP_WARNS 10 | |
2c00a5a8 | 27 | #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16 |
5bcae85e SL |
28 | #define INSTR_PROF_VNODE_POOL_SIZE 1024 |
29 | ||
30 | #ifndef _MSC_VER | |
31 | /* A shared static pool in addition to the vnodes statically | |
32 | * allocated by the compiler. */ | |
33 | COMPILER_RT_VISIBILITY ValueProfNode | |
34 | lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( | |
35 | COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR); | |
36 | #endif | |
3157f602 | 37 | |
5bcae85e SL |
38 | COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = |
39 | INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; | |
40 | ||
41 | COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() { | |
42 | const char *Str = 0; | |
43 | Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); | |
44 | if (Str && Str[0]) { | |
45 | VPMaxNumValsPerSite = atoi(Str); | |
46 | hasNonDefaultValsPerSite = 1; | |
3157f602 | 47 | } |
5bcae85e SL |
48 | if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) |
49 | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; | |
50 | } | |
51 | ||
52 | COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { | |
53 | VPMaxNumValsPerSite = MaxVals; | |
54 | hasNonDefaultValsPerSite = 1; | |
3157f602 | 55 | } |
3157f602 XL |
56 | |
57 | /* This method is only used in value profiler mock testing. */ | |
58 | COMPILER_RT_VISIBILITY void | |
59 | __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, | |
60 | uint32_t ValueKind, uint16_t NumValueSites) { | |
61 | *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; | |
62 | } | |
63 | ||
64 | /* This method is only used in value profiler mock testing. */ | |
65 | COMPILER_RT_VISIBILITY const __llvm_profile_data * | |
66 | __llvm_profile_iterate_data(const __llvm_profile_data *Data) { | |
67 | return Data + 1; | |
68 | } | |
69 | ||
70 | /* This method is only used in value profiler mock testing. */ | |
71 | COMPILER_RT_VISIBILITY void * | |
72 | __llvm_get_function_addr(const __llvm_profile_data *Data) { | |
73 | return Data->FunctionPointer; | |
74 | } | |
75 | ||
76 | /* Allocate an array that holds the pointers to the linked lists of | |
77 | * value profile counter nodes. The number of element of the array | |
78 | * is the total number of value profile sites instrumented. Returns | |
79 | * 0 if allocation fails. | |
80 | */ | |
81 | ||
82 | static int allocateValueProfileCounters(__llvm_profile_data *Data) { | |
83 | uint64_t NumVSites = 0; | |
84 | uint32_t VKI; | |
5bcae85e SL |
85 | |
86 | /* This function will never be called when value site array is allocated | |
87 | statically at compile time. */ | |
88 | hasStaticCounters = 0; | |
89 | /* When dynamic allocation is enabled, allow tracking the max number of | |
90 | * values allowd. */ | |
91 | if (!hasNonDefaultValsPerSite) | |
92 | VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; | |
93 | ||
3157f602 XL |
94 | for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) |
95 | NumVSites += Data->NumValueSites[VKI]; | |
96 | ||
97 | ValueProfNode **Mem = | |
98 | (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); | |
99 | if (!Mem) | |
100 | return 0; | |
101 | if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { | |
102 | free(Mem); | |
103 | return 0; | |
104 | } | |
105 | return 1; | |
106 | } | |
107 | ||
5bcae85e SL |
108 | static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index, |
109 | uint64_t Value) { | |
110 | ValueProfNode *Node; | |
111 | ||
112 | if (!hasStaticCounters) | |
113 | return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); | |
114 | ||
115 | /* Early check to avoid value wrapping around. */ | |
116 | if (CurrentVNode + 1 > EndVNode) { | |
117 | if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { | |
118 | PROF_WARN("Unable to track new values: %s. " | |
119 | " Consider using option -mllvm -vp-counters-per-site=<n> to " | |
120 | "allocate more" | |
121 | " value profile counters at compile time. \n", | |
122 | "Running out of static counters"); | |
123 | } | |
124 | return 0; | |
125 | } | |
126 | Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); | |
127 | /* Due to section padding, EndVNode point to a byte which is one pass | |
128 | * an incomplete VNode, so we need to skip the last incomplete node. */ | |
129 | if (Node + 1 > EndVNode) | |
130 | return 0; | |
131 | ||
132 | return Node; | |
133 | } | |
134 | ||
3157f602 XL |
135 | COMPILER_RT_VISIBILITY void |
136 | __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, | |
137 | uint32_t CounterIndex) { | |
3157f602 XL |
138 | __llvm_profile_data *PData = (__llvm_profile_data *)Data; |
139 | if (!PData) | |
140 | return; | |
141 | ||
142 | if (!PData->Values) { | |
143 | if (!allocateValueProfileCounters(PData)) | |
144 | return; | |
145 | } | |
146 | ||
147 | ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; | |
148 | ValueProfNode *PrevVNode = NULL; | |
5bcae85e SL |
149 | ValueProfNode *MinCountVNode = NULL; |
150 | ValueProfNode *CurVNode = ValueCounters[CounterIndex]; | |
151 | uint64_t MinCount = UINT64_MAX; | |
3157f602 XL |
152 | |
153 | uint8_t VDataCount = 0; | |
5bcae85e SL |
154 | while (CurVNode) { |
155 | if (TargetValue == CurVNode->Value) { | |
156 | CurVNode->Count++; | |
3157f602 XL |
157 | return; |
158 | } | |
5bcae85e SL |
159 | if (CurVNode->Count < MinCount) { |
160 | MinCount = CurVNode->Count; | |
161 | MinCountVNode = CurVNode; | |
162 | } | |
163 | PrevVNode = CurVNode; | |
164 | CurVNode = CurVNode->Next; | |
3157f602 XL |
165 | ++VDataCount; |
166 | } | |
167 | ||
5bcae85e SL |
168 | if (VDataCount >= VPMaxNumValsPerSite) { |
169 | /* Bump down the min count node's count. If it reaches 0, | |
170 | * evict it. This eviction/replacement policy makes hot | |
171 | * targets more sticky while cold targets less so. In other | |
172 | * words, it makes it less likely for the hot targets to be | |
173 | * prematurally evicted during warmup/establishment period, | |
174 | * when their counts are still low. In a special case when | |
175 | * the number of values tracked is reduced to only one, this | |
176 | * policy will guarantee that the dominating target with >50% | |
177 | * total count will survive in the end. Note that this scheme | |
178 | * allows the runtime to track the min count node in an adaptive | |
179 | * manner. It can correct previous mistakes and eventually | |
180 | * lock on a cold target that is alread in stable state. | |
181 | * | |
182 | * In very rare cases, this replacement scheme may still lead | |
183 | * to target loss. For instance, out of \c N value slots, \c N-1 | |
184 | * slots are occupied by luke warm targets during the warmup | |
185 | * period and the remaining one slot is competed by two or more | |
186 | * very hot targets. If those hot targets occur in an interleaved | |
187 | * way, none of them will survive (gain enough weight to throw out | |
188 | * other established entries) due to the ping-pong effect. | |
189 | * To handle this situation, user can choose to increase the max | |
190 | * number of tracked values per value site. Alternatively, a more | |
191 | * expensive eviction mechanism can be implemented. It requires | |
192 | * the runtime to track the total number of evictions per-site. | |
193 | * When the total number of evictions reaches certain threshold, | |
194 | * the runtime can wipe out more than one lowest count entries | |
195 | * to give space for hot targets. | |
196 | */ | |
7cac9316 | 197 | if (!MinCountVNode->Count || !(--MinCountVNode->Count)) { |
5bcae85e SL |
198 | CurVNode = MinCountVNode; |
199 | CurVNode->Value = TargetValue; | |
200 | CurVNode->Count++; | |
201 | } | |
3157f602 | 202 | return; |
5bcae85e | 203 | } |
3157f602 | 204 | |
5bcae85e SL |
205 | CurVNode = allocateOneNode(PData, CounterIndex, TargetValue); |
206 | if (!CurVNode) | |
3157f602 | 207 | return; |
5bcae85e SL |
208 | CurVNode->Value = TargetValue; |
209 | CurVNode->Count++; | |
3157f602 XL |
210 | |
211 | uint32_t Success = 0; | |
212 | if (!ValueCounters[CounterIndex]) | |
213 | Success = | |
5bcae85e | 214 | COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); |
3157f602 | 215 | else if (PrevVNode && !PrevVNode->Next) |
5bcae85e | 216 | Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); |
3157f602 | 217 | |
5bcae85e SL |
218 | if (!Success && !hasStaticCounters) { |
219 | free(CurVNode); | |
3157f602 XL |
220 | return; |
221 | } | |
222 | } | |
223 | ||
2c00a5a8 XL |
224 | /* |
225 | * The target values are partitioned into multiple regions/ranges. There is one | |
226 | * contiguous region which is precise -- every value in the range is tracked | |
227 | * individually. A value outside the precise region will be collapsed into one | |
228 | * value depending on the region it falls in. | |
229 | * | |
230 | * There are three regions: | |
231 | * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong | |
232 | * to one region -- all values here should be mapped to one value of | |
233 | * "PreciseRangeLast + 1". | |
234 | * 2. [PreciseRangeStart, PreciseRangeLast] | |
235 | * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue. | |
236 | * | |
237 | * The range for large values is optional. The default value of INT64_MIN | |
238 | * indicates it is not specified. | |
239 | */ | |
240 | COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range( | |
241 | uint64_t TargetValue, void *Data, uint32_t CounterIndex, | |
242 | int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) { | |
243 | ||
244 | if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue) | |
245 | TargetValue = LargeValue; | |
246 | else if ((int64_t)TargetValue < PreciseRangeStart || | |
247 | (int64_t)TargetValue > PreciseRangeLast) | |
248 | TargetValue = PreciseRangeLast + 1; | |
249 | ||
250 | __llvm_profile_instrument_target(TargetValue, Data, CounterIndex); | |
251 | } | |
252 | ||
5bcae85e SL |
253 | /* |
254 | * A wrapper struct that represents value profile runtime data. | |
255 | * Like InstrProfRecord class which is used by profiling host tools, | |
256 | * ValueProfRuntimeRecord also implements the abstract intefaces defined in | |
257 | * ValueProfRecordClosure so that the runtime data can be serialized using | |
258 | * shared C implementation. | |
259 | */ | |
260 | typedef struct ValueProfRuntimeRecord { | |
261 | const __llvm_profile_data *Data; | |
262 | ValueProfNode **NodesKind[IPVK_Last + 1]; | |
263 | uint8_t **SiteCountArray; | |
264 | } ValueProfRuntimeRecord; | |
265 | ||
266 | /* ValueProfRecordClosure Interface implementation. */ | |
267 | ||
268 | static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { | |
269 | return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; | |
270 | } | |
271 | ||
272 | static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { | |
273 | uint32_t S = 0, I; | |
274 | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; | |
275 | if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) | |
276 | return 0; | |
277 | for (I = 0; I < Record->Data->NumValueSites[VK]; I++) | |
278 | S += Record->SiteCountArray[VK][I]; | |
279 | return S; | |
280 | } | |
281 | ||
282 | static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, | |
283 | uint32_t S) { | |
284 | const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; | |
285 | return Record->SiteCountArray[VK][S]; | |
286 | } | |
287 | ||
288 | static ValueProfRuntimeRecord RTRecord; | |
289 | static ValueProfRecordClosure RTRecordClosure = { | |
290 | &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ | |
291 | getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, | |
292 | INSTR_PROF_NULLPTR, /* RemapValueData */ | |
293 | INSTR_PROF_NULLPTR, /* GetValueForSite, */ | |
294 | INSTR_PROF_NULLPTR /* AllocValueProfData */ | |
295 | }; | |
296 | ||
297 | static uint32_t | |
298 | initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, | |
299 | uint8_t *SiteCountArray[]) { | |
300 | unsigned I, J, S = 0, NumValueKinds = 0; | |
301 | ValueProfNode **Nodes = (ValueProfNode **)Data->Values; | |
302 | RTRecord.Data = Data; | |
303 | RTRecord.SiteCountArray = SiteCountArray; | |
304 | for (I = 0; I <= IPVK_Last; I++) { | |
305 | uint16_t N = Data->NumValueSites[I]; | |
306 | if (!N) | |
307 | continue; | |
308 | ||
309 | NumValueKinds++; | |
310 | ||
311 | RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; | |
312 | for (J = 0; J < N; J++) { | |
313 | /* Compute value count for each site. */ | |
314 | uint32_t C = 0; | |
315 | ValueProfNode *Site = | |
316 | Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; | |
317 | while (Site) { | |
318 | C++; | |
319 | Site = Site->Next; | |
320 | } | |
321 | if (C > UCHAR_MAX) | |
322 | C = UCHAR_MAX; | |
323 | RTRecord.SiteCountArray[I][J] = C; | |
3157f602 | 324 | } |
5bcae85e | 325 | S += N; |
3157f602 | 326 | } |
5bcae85e SL |
327 | return NumValueKinds; |
328 | } | |
3157f602 | 329 | |
5bcae85e SL |
330 | static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, |
331 | InstrProfValueData *Dst, | |
332 | ValueProfNode *StartNode, uint32_t N) { | |
333 | unsigned I; | |
334 | ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; | |
335 | for (I = 0; I < N; I++) { | |
336 | Dst[I].Value = VNode->Value; | |
337 | Dst[I].Count = VNode->Count; | |
338 | VNode = VNode->Next; | |
3157f602 | 339 | } |
5bcae85e SL |
340 | return VNode; |
341 | } | |
342 | ||
343 | static uint32_t getValueProfDataSizeWrapper(void) { | |
344 | return getValueProfDataSize(&RTRecordClosure); | |
345 | } | |
346 | ||
347 | static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { | |
348 | return getNumValueDataForSiteRT(&RTRecord, VK, S); | |
349 | } | |
350 | ||
351 | static VPDataReaderType TheVPDataReader = { | |
352 | initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, | |
353 | getFirstValueProfRecord, getNumValueDataForSiteWrapper, | |
354 | getValueProfDataSizeWrapper, getNextNValueData}; | |
3157f602 | 355 | |
5bcae85e SL |
356 | COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() { |
357 | return &TheVPDataReader; | |
3157f602 | 358 | } |