ceph/src/zstd/programs/benchfn.c

   1 /*
   2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
   3  * All rights reserved.
   4  *
   5  * This source code is licensed under both the BSD-style license (found in the
   6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
   7  * in the COPYING file in the root directory of this source tree).
   8  * You may select, at your option, one of the above-listed licenses.
   9  */
  10
  11
  12
  13 /* *************************************
  14 *  Includes
  15 ***************************************/
  16 #include <stdlib.h>      /* malloc, free */
  17 #include <string.h>      /* memset */
  18 #undef NDEBUG            /* assert must not be disabled */
  19 #include <assert.h>      /* assert */
  20
  21 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
  22 #include "benchfn.h"
  23
  24
  25 /* *************************************
  26 *  Constants
  27 ***************************************/
  28 #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
  29 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
  30
  31 #define KB *(1 <<10)
  32 #define MB *(1 <<20)
  33 #define GB *(1U<<30)
  34
  35
  36 /* *************************************
  37 *  Debug errors
  38 ***************************************/
  39 #if defined(DEBUG) && (DEBUG >= 1)
  40 #  include <stdio.h>       /* fprintf */
  41 #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
  42 #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
  43 #else
  44 #  define DEBUGOUTPUT(...)
  45 #endif
  46
  47
  48 /* error without displaying */
  49 #define RETURN_QUIET_ERROR(retValue, ...) {           \
  50     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
  51     DEBUGOUTPUT("Error : ");                          \
  52     DEBUGOUTPUT(__VA_ARGS__);                         \
  53     DEBUGOUTPUT(" \n");                               \
  54     return retValue;                                  \
  55 }
  56
  57
  58 /* *************************************
  59 *  Benchmarking an arbitrary function
  60 ***************************************/
  61
  62 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
  63 {
  64     return outcome.error_tag_never_ever_use_directly == 0;
  65 }
  66
  67 /* warning : this function will stop program execution if outcome is invalid !
  68  *           check outcome validity first, using BMK_isValid_runResult() */
  69 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
  70 {
  71     assert(outcome.error_tag_never_ever_use_directly == 0);
  72     return outcome.internal_never_ever_use_directly;
  73 }
  74
  75 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
  76 {
  77     assert(outcome.error_tag_never_ever_use_directly != 0);
  78     return outcome.error_result_never_ever_use_directly;
  79 }
  80
  81 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
  82 {
  83     BMK_runOutcome_t b;
  84     memset(&b, 0, sizeof(b));
  85     b.error_tag_never_ever_use_directly = 1;
  86     b.error_result_never_ever_use_directly = errorResult;
  87     return b;
  88 }
  89
  90 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
  91 {
  92     BMK_runOutcome_t outcome;
  93     outcome.error_tag_never_ever_use_directly = 0;
  94     outcome.internal_never_ever_use_directly = runTime;
  95     return outcome;
  96 }
  97
  98
  99 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
 100 /* initFn is optional, provide NULL if none */
 101 /* benchFn must return a size_t value that errorFn can interpret */
 102 /* takes # of blocks and list of size & stuff for each. */
 103 /* can report result of benchFn for each block into blockResult. */
 104 /* blockResult is optional, provide NULL if this information is not required */
 105 /* note : time per loop can be reported as zero if run time < timer resolution */
 106 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
 107                                    unsigned nbLoops)
 108 {
 109     size_t dstSize = 0;
 110     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
 111
 112     /* init */
 113     {   size_t i;
 114         for(i = 0; i < p.blockCount; i++) {
 115             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
 116     }   }
 117
 118     /* benchmark */
 119     {   UTIL_time_t const clockStart = UTIL_getTime();
 120         unsigned loopNb, blockNb;
 121         if (p.initFn != NULL) p.initFn(p.initPayload);
 122         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
 123             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
 124                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
 125                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
 126                                    p.benchPayload);
 127                 if (loopNb == 0) {
 128                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
 129                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
 130                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
 131                             "Function benchmark failed on block %u (of size %u) with error %i",
 132                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
 133                     }
 134                     dstSize += res;
 135             }   }
 136         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
 137
 138         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
 139             BMK_runTime_t rt;
 140             rt.nanoSecPerRun = (double)totalTime / nbLoops;
 141             rt.sumOfReturn = dstSize;
 142             return BMK_setValid_runTime(rt);
 143     }   }
 144 }
 145
 146
 147 /* ====  Benchmarking any function, providing intermediate results  ==== */
 148
 149 struct BMK_timedFnState_s {
 150     PTime timeSpent_ns;
 151     PTime timeBudget_ns;
 152     PTime runBudget_ns;
 153     BMK_runTime_t fastestRun;
 154     unsigned nbLoops;
 155     UTIL_time_t coolTime;
 156 };  /* typedef'd to BMK_timedFnState_t within bench.h */
 157
 158 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
 159 {
 160     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
 161     if (r == NULL) return NULL;   /* malloc() error */
 162     BMK_resetTimedFnState(r, total_ms, run_ms);
 163     return r;
 164 }
 165
 166 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
 167
 168 BMK_timedFnState_t*
 169 BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
 170 {
 171     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
 172     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
 173     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
 174     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
 175     if (buffer == NULL) return NULL;
 176     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
 177     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
 178     BMK_resetTimedFnState(r, total_ms, run_ms);
 179     return r;
 180 }
 181
 182 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
 183 {
 184     if (!total_ms) total_ms = 1 ;
 185     if (!run_ms) run_ms = 1;
 186     if (run_ms > total_ms) run_ms = total_ms;
 187     timedFnState->timeSpent_ns = 0;
 188     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
 189     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
 190     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
 191     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
 192     timedFnState->nbLoops = 1;
 193     timedFnState->coolTime = UTIL_getTime();
 194 }
 195
 196 /* Tells if nb of seconds set in timedFnState for all runs is spent.
 197  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
 198 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
 199 {
 200     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
 201 }
 202
 203
 204 #undef MIN
 205 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
 206
 207 #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
 208
 209 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
 210                                   BMK_benchParams_t p)
 211 {
 212     PTime const runBudget_ns = cont->runBudget_ns;
 213     PTime const runTimeMin_ns = runBudget_ns / 2;
 214     int completed = 0;
 215     BMK_runTime_t bestRunTime = cont->fastestRun;
 216
 217     while (!completed) {
 218         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
 219
 220         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
 221             return runResult;
 222         }
 223
 224         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
 225             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
 226
 227             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
 228
 229             /* estimate nbLoops for next run to last approximately 1 second */
 230             if (loopDuration_ns > (runBudget_ns / 50)) {
 231                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
 232                 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
 233             } else {
 234                 /* previous run was too short : blindly increase workload by x multiplier */
 235                 const unsigned multiplier = 10;
 236                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
 237                 cont->nbLoops *= multiplier;
 238             }
 239
 240             if(loopDuration_ns < runTimeMin_ns) {
 241                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
 242                 assert(completed == 0);
 243                 continue;
 244             } else {
 245                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
 246                     bestRunTime = newRunTime;
 247                 }
 248                 completed = 1;
 249             }
 250         }
 251     }   /* while (!completed) */
 252
 253     return BMK_setValid_runTime(bestRunTime);
 254 }