2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
13 /* *************************************
15 ***************************************/
16 #include <stdlib.h> /* malloc, free */
17 #include <string.h> /* memset */
18 #undef NDEBUG /* assert must not be disabled */
19 #include <assert.h> /* assert */
21 #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
25 /* *************************************
27 ***************************************/
28 #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
29 #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
36 /* *************************************
38 ***************************************/
39 #if defined(DEBUG) && (DEBUG >= 1)
40 # include <stdio.h> /* fprintf */
41 # define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
42 # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
44 # define DEBUGOUTPUT(...)
48 /* error without displaying */
49 #define RETURN_QUIET_ERROR(retValue, ...) { \
50 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
51 DEBUGOUTPUT("Error : "); \
52 DEBUGOUTPUT(__VA_ARGS__); \
58 /* *************************************
59 * Benchmarking an arbitrary function
60 ***************************************/
62 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome
)
64 return outcome
.error_tag_never_ever_use_directly
== 0;
67 /* warning : this function will stop program execution if outcome is invalid !
68 * check outcome validity first, using BMK_isValid_runResult() */
69 BMK_runTime_t
BMK_extract_runTime(BMK_runOutcome_t outcome
)
71 assert(outcome
.error_tag_never_ever_use_directly
== 0);
72 return outcome
.internal_never_ever_use_directly
;
75 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome
)
77 assert(outcome
.error_tag_never_ever_use_directly
!= 0);
78 return outcome
.error_result_never_ever_use_directly
;
81 static BMK_runOutcome_t
BMK_runOutcome_error(size_t errorResult
)
84 memset(&b
, 0, sizeof(b
));
85 b
.error_tag_never_ever_use_directly
= 1;
86 b
.error_result_never_ever_use_directly
= errorResult
;
90 static BMK_runOutcome_t
BMK_setValid_runTime(BMK_runTime_t runTime
)
92 BMK_runOutcome_t outcome
;
93 outcome
.error_tag_never_ever_use_directly
= 0;
94 outcome
.internal_never_ever_use_directly
= runTime
;
99 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
100 /* initFn is optional, provide NULL if none */
101 /* benchFn must return a size_t value that errorFn can interpret */
102 /* takes # of blocks and list of size & stuff for each. */
103 /* can report result of benchFn for each block into blockResult. */
104 /* blockResult is optional, provide NULL if this information is not required */
105 /* note : time per loop can be reported as zero if run time < timer resolution */
106 BMK_runOutcome_t
BMK_benchFunction(BMK_benchParams_t p
,
110 nbLoops
+= !nbLoops
; /* minimum nbLoops is 1 */
114 for(i
= 0; i
< p
.blockCount
; i
++) {
115 memset(p
.dstBuffers
[i
], 0xE5, p
.dstCapacities
[i
]); /* warm up and erase result buffer */
119 { UTIL_time_t
const clockStart
= UTIL_getTime();
120 unsigned loopNb
, blockNb
;
121 if (p
.initFn
!= NULL
) p
.initFn(p
.initPayload
);
122 for (loopNb
= 0; loopNb
< nbLoops
; loopNb
++) {
123 for (blockNb
= 0; blockNb
< p
.blockCount
; blockNb
++) {
124 size_t const res
= p
.benchFn(p
.srcBuffers
[blockNb
], p
.srcSizes
[blockNb
],
125 p
.dstBuffers
[blockNb
], p
.dstCapacities
[blockNb
],
128 if (p
.blockResults
!= NULL
) p
.blockResults
[blockNb
] = res
;
129 if ((p
.errorFn
!= NULL
) && (p
.errorFn(res
))) {
130 RETURN_QUIET_ERROR(BMK_runOutcome_error(res
),
131 "Function benchmark failed on block %u (of size %u) with error %i",
132 blockNb
, (unsigned)p
.srcSizes
[blockNb
], (int)res
);
136 } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
138 { PTime
const totalTime
= UTIL_clockSpanNano(clockStart
);
140 rt
.nanoSecPerRun
= (double)totalTime
/ nbLoops
;
141 rt
.sumOfReturn
= dstSize
;
142 return BMK_setValid_runTime(rt
);
147 /* ==== Benchmarking any function, providing intermediate results ==== */
149 struct BMK_timedFnState_s
{
153 BMK_runTime_t fastestRun
;
155 UTIL_time_t coolTime
;
156 }; /* typedef'd to BMK_timedFnState_t within bench.h */
158 BMK_timedFnState_t
* BMK_createTimedFnState(unsigned total_ms
, unsigned run_ms
)
160 BMK_timedFnState_t
* const r
= (BMK_timedFnState_t
*)malloc(sizeof(*r
));
161 if (r
== NULL
) return NULL
; /* malloc() error */
162 BMK_resetTimedFnState(r
, total_ms
, run_ms
);
166 void BMK_freeTimedFnState(BMK_timedFnState_t
* state
) { free(state
); }
169 BMK_initStatic_timedFnState(void* buffer
, size_t size
, unsigned total_ms
, unsigned run_ms
)
171 typedef char check_size
[ 2 * (sizeof(BMK_timedFnState_shell
) >= sizeof(struct BMK_timedFnState_s
)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
172 typedef struct { check_size c
; BMK_timedFnState_t tfs
; } tfs_align
; /* force tfs to be aligned at its next best position */
173 size_t const tfs_alignment
= offsetof(tfs_align
, tfs
); /* provides the minimal alignment restriction for BMK_timedFnState_t */
174 BMK_timedFnState_t
* const r
= (BMK_timedFnState_t
*)buffer
;
175 if (buffer
== NULL
) return NULL
;
176 if (size
< sizeof(struct BMK_timedFnState_s
)) return NULL
;
177 if ((size_t)buffer
% tfs_alignment
) return NULL
; /* buffer must be properly aligned */
178 BMK_resetTimedFnState(r
, total_ms
, run_ms
);
182 void BMK_resetTimedFnState(BMK_timedFnState_t
* timedFnState
, unsigned total_ms
, unsigned run_ms
)
184 if (!total_ms
) total_ms
= 1 ;
185 if (!run_ms
) run_ms
= 1;
186 if (run_ms
> total_ms
) run_ms
= total_ms
;
187 timedFnState
->timeSpent_ns
= 0;
188 timedFnState
->timeBudget_ns
= (PTime
)total_ms
* TIMELOOP_NANOSEC
/ 1000;
189 timedFnState
->runBudget_ns
= (PTime
)run_ms
* TIMELOOP_NANOSEC
/ 1000;
190 timedFnState
->fastestRun
.nanoSecPerRun
= (double)TIMELOOP_NANOSEC
* 2000000000; /* hopefully large enough : must be larger than any potential measurement */
191 timedFnState
->fastestRun
.sumOfReturn
= (size_t)(-1LL);
192 timedFnState
->nbLoops
= 1;
193 timedFnState
->coolTime
= UTIL_getTime();
196 /* Tells if nb of seconds set in timedFnState for all runs is spent.
197 * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
198 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t
* timedFnState
)
200 return (timedFnState
->timeSpent_ns
>= timedFnState
->timeBudget_ns
);
205 #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
207 #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
209 BMK_runOutcome_t
BMK_benchTimedFn(BMK_timedFnState_t
* cont
,
212 PTime
const runBudget_ns
= cont
->runBudget_ns
;
213 PTime
const runTimeMin_ns
= runBudget_ns
/ 2;
215 BMK_runTime_t bestRunTime
= cont
->fastestRun
;
218 BMK_runOutcome_t
const runResult
= BMK_benchFunction(p
, cont
->nbLoops
);
220 if(!BMK_isSuccessful_runOutcome(runResult
)) { /* error : move out */
224 { BMK_runTime_t
const newRunTime
= BMK_extract_runTime(runResult
);
225 double const loopDuration_ns
= newRunTime
.nanoSecPerRun
* cont
->nbLoops
;
227 cont
->timeSpent_ns
+= (unsigned long long)loopDuration_ns
;
229 /* estimate nbLoops for next run to last approximately 1 second */
230 if (loopDuration_ns
> (runBudget_ns
/ 50)) {
231 double const fastestRun_ns
= MIN(bestRunTime
.nanoSecPerRun
, newRunTime
.nanoSecPerRun
);
232 cont
->nbLoops
= (unsigned)(runBudget_ns
/ fastestRun_ns
) + 1;
234 /* previous run was too short : blindly increase workload by x multiplier */
235 const unsigned multiplier
= 10;
236 assert(cont
->nbLoops
< ((unsigned)-1) / multiplier
); /* avoid overflow */
237 cont
->nbLoops
*= multiplier
;
240 if(loopDuration_ns
< runTimeMin_ns
) {
241 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
242 assert(completed
== 0);
245 if(newRunTime
.nanoSecPerRun
< bestRunTime
.nanoSecPerRun
) {
246 bestRunTime
= newRunTime
;
251 } /* while (!completed) */
253 return BMK_setValid_runTime(bestRunTime
);