ceph/src/boost/libs/parameter/test/efficiency.cpp

   1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
   2 // 2005. Distributed under the Boost Software License, Version
   3 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
   4 // http://www.boost.org/LICENSE_1_0.txt)
   5
   6 #include <boost/parameter.hpp>
   7 #include <boost/timer.hpp>
   8 #include <iostream>
   9
  10 namespace test
  11 {
  12   //
  13   // This test measures the abstraction overhead of using the named
  14   // parameter interface.  Some actual test results have been recorded
  15   // in timings.txt in this source file's directory, or
  16   // http://www.boost.org/libs/parameter/test/timings.txt.
  17   //
  18   // Caveats:
  19   //
  20   //   1. This test penalizes the named parameter library slightly, by
  21   //      passing two arguments through the named interface, while
  22   //      only passing one through the plain C++ interface.
  23   //
  24   //   2. This test does not measure the case where an ArgumentPack is
  25   //      so large that it doesn't fit in the L1 cache.
  26   //
  27   //   3. Although we've tried to make this test as general as
  28   //      possible, we are targeting it at a specific application.
  29   //      Where that affects design decisions, we've noted it below in
  30   //      ***...***.
  31   //
  32   //   4. The first time you run this program, the time may not be
  33   //      representative because of disk and memory cache effects, so
  34   //      always run it multiple times and ignore the first
  35   //      measurement.  This approach will also allow you to estimate
  36   //      the statistical error of your test by observing the
  37   //      variation in the valid times.
  38   //
  39   //   5. Try to run this program on a machine that's otherwise idle,
  40   //      or other processes and even device hardware interrupts may
  41   //      interfere by causing caches to be flushed.
  42
  43   // Accumulator function object with plain C++ interface
  44   template <class T>
  45   struct plain_weight_running_total
  46   {
  47       plain_weight_running_total()
  48 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
  49         : sum(T())
  50 #else
  51         : sum()
  52 #endif
  53       {}
  54
  55       void operator()(T w)
  56       {
  57           this->sum += w;
  58       }
  59
  60       T sum;
  61   };
  62
  63   BOOST_PARAMETER_KEYWORD(tag, weight)
  64   BOOST_PARAMETER_KEYWORD(tag, value)
  65
  66   // Accumulator function object with named parameter interface
  67   template <class T>
  68   struct named_param_weight_running_total
  69   {
  70       named_param_weight_running_total()
  71 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
  72         : sum(T())
  73 #else
  74         : sum()
  75 #endif
  76       {}
  77
  78       template <class ArgumentPack>
  79       void operator()(ArgumentPack const& variates)
  80       {
  81           this->sum += variates[weight];
  82       }
  83
  84       T sum;
  85   };
  86
  87   // This value is required to ensure that a smart compiler's dead
  88   // code elimination doesn't optimize away anything we're testing.
  89   // We'll use it to compute the return code of the executable to make
  90   // sure it's needed.
  91   double live_code;
  92
  93   // Call objects of the given Accumulator type repeatedly with x as
  94   // an argument.
  95   template <class Accumulator, class Arg>
  96   void hammer(Arg const& x, long const repeats)
  97   {
  98       // Strategy: because the sum in an accumulator after each call
  99       // depends on the previous value of the sum, the CPU's pipeline
 100       // might be stalled while waiting for the previous addition to
 101       // complete.  Therefore, we allocate an array of accumulators,
 102       // and update them in sequence, so that there's no dependency
 103       // between adjacent addition operations.
 104       //
 105       // Additionally, if there were only one accumulator, the
 106       // compiler or CPU might decide to update the value in a
 107       // register rather that writing it back to memory.  we want each
 108       // operation to at least update the L1 cache.  *** Note: This
 109       // concern is specific to the particular application at which
 110       // we're targeting the test. ***
 111
 112       // This has to be at least as large as the number of
 113       // simultaneous accumulations that can be executing in the
 114       // compiler pipeline.  A safe number here is larger than the
 115       // machine's maximum pipeline depth. If you want to test the L2
 116       // or L3 cache, or main memory, you can increase the size of
 117       // this array.  1024 is an upper limit on the pipeline depth of
 118       // current vector machines.
 119       const std::size_t number_of_accumulators = 1024;
 120
 121       Accumulator a[number_of_accumulators];
 122
 123       for (long iteration = 0; iteration < repeats; ++iteration)
 124       {
 125           for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
 126           {
 127               (*ap)(x);
 128           }
 129       }
 130
 131       // Accumulate all the partial sums to avoid dead code
 132       // elimination.
 133       for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
 134       {
 135           live_code += ap->sum;
 136       }
 137   }
 138
 139   // Measure the time required to hammer accumulators of the given
 140   // type with the argument x.
 141   template <class Accumulator, class T>
 142   double measure(T const& x, long const repeats)
 143   {
 144       // Hammer accumulators a couple of times to ensure the
 145       // instruction cache is full of our test code, and that we don't
 146       // measure the cost of a page fault for accessing the data page
 147       // containing the memory where the accumulators will be
 148       // allocated
 149       hammer<Accumulator>(x, repeats);
 150       hammer<Accumulator>(x, repeats);
 151
 152       // Now start a timer
 153       boost::timer time;
 154       hammer<Accumulator>(x, repeats);  // This time, we'll measure
 155       return time.elapsed();
 156   }
 157 }
 158
 159 int main()
 160 {
 161     using namespace test;
 162
 163     // first decide how many repetitions to measure
 164     long repeats = 100;
 165     double measured = 0;
 166     while (measured < 1.0 && repeats <= 10000000)
 167     {
 168         repeats *= 10;
 169
 170         boost::timer time;
 171
 172         hammer<plain_weight_running_total<double> >(.1, repeats);
 173         hammer<named_param_weight_running_total<double> >(
 174             (weight = .1, value = .2), repeats);
 175
 176         measured = time.elapsed();
 177     }
 178
 179     std::cout
 180         << "plain time:           "
 181         << measure<plain_weight_running_total<double> >(.1, repeats)
 182         << std::endl;
 183
 184     std::cout
 185         << "named parameter time: "
 186         << measure<named_param_weight_running_total<double> >(
 187             (weight = .1, value = .2), repeats
 188         )
 189         << std::endl;
 190
 191     // This is ultimately responsible for preventing all the test code
 192     // from being optimized away.  Change this to return 0 and you
 193     // unplug the whole test's life support system.
 194     return live_code < 0.;
 195 }