[ceph.git] / ceph / src / boost / libs / parameter / test / efficiency.cpp

// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
// 2005. Distributed under the Boost Software License, Version
// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#include <boost/parameter.hpp>
#include <boost/timer.hpp>
#include <iostream>

namespace test
{
  //
  // This test measures the abstraction overhead of using the named
  // parameter interface.  Some actual test results have been recorded
  // in timings.txt in this source file's directory, or
  // http://www.boost.org/libs/parameter/test/timings.txt.
  //
  // Caveats:
  //
  //   1. This test penalizes the named parameter library slightly, by
  //      passing two arguments through the named interface, while
  //      only passing one through the plain C++ interface.
  //
  //   2. This test does not measure the case where an ArgumentPack is
  //      so large that it doesn't fit in the L1 cache.
  //
  //   3. Although we've tried to make this test as general as
  //      possible, we are targeting it at a specific application.
  //      Where that affects design decisions, we've noted it below in
  //      ***...***.
  //
  //   4. The first time you run this program, the time may not be
  //      representative because of disk and memory cache effects, so
  //      always run it multiple times and ignore the first
  //      measurement.  This approach will also allow you to estimate
  //      the statistical error of your test by observing the
  //      variation in the valid times.
  //
  //   5. Try to run this program on a machine that's otherwise idle,
  //      or other processes and even device hardware interrupts may
  //      interfere by causing caches to be flushed.
  
  // Accumulator function object with plain C++ interface 
  template <class T>
  struct plain_weight_running_total
  {
      plain_weight_running_total()
#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
        : sum(T())
#else
        : sum()
#endif 
      {}
      
      void operator()(T w)
      {
          this->sum += w;
      }

      T sum;
  };

  BOOST_PARAMETER_KEYWORD(tag, weight)
  BOOST_PARAMETER_KEYWORD(tag, value)
      
  // Accumulator function object with named parameter interface
  template <class T>
  struct named_param_weight_running_total
  {
      named_param_weight_running_total()
#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
        : sum(T())
#else
        : sum()
#endif 
      {}

      template <class ArgumentPack>
      void operator()(ArgumentPack const& variates)
      {
          this->sum += variates[weight];
      }

      T sum;
  };

  // This value is required to ensure that a smart compiler's dead
  // code elimination doesn't optimize away anything we're testing.
  // We'll use it to compute the return code of the executable to make
  // sure it's needed.
  double live_code;

  // Call objects of the given Accumulator type repeatedly with x as
  // an argument.
  template <class Accumulator, class Arg>
  void hammer(Arg const& x, long const repeats)
  {
      // Strategy: because the sum in an accumulator after each call
      // depends on the previous value of the sum, the CPU's pipeline
      // might be stalled while waiting for the previous addition to
      // complete.  Therefore, we allocate an array of accumulators,
      // and update them in sequence, so that there's no dependency
      // between adjacent addition operations.
      //
      // Additionally, if there were only one accumulator, the
      // compiler or CPU might decide to update the value in a
      // register rather that writing it back to memory.  we want each
      // operation to at least update the L1 cache.  *** Note: This
      // concern is specific to the particular application at which
      // we're targeting the test. ***

      // This has to be at least as large as the number of
      // simultaneous accumulations that can be executing in the
      // compiler pipeline.  A safe number here is larger than the
      // machine's maximum pipeline depth. If you want to test the L2
      // or L3 cache, or main memory, you can increase the size of
      // this array.  1024 is an upper limit on the pipeline depth of
      // current vector machines.
      const std::size_t number_of_accumulators = 1024;
      
      Accumulator a[number_of_accumulators];
      
      for (long iteration = 0; iteration < repeats; ++iteration)
      {
          for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
          {
              (*ap)(x);
          }
      }

      // Accumulate all the partial sums to avoid dead code
      // elimination.
      for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
      {
          live_code += ap->sum;
      }
  }

  // Measure the time required to hammer accumulators of the given
  // type with the argument x.
  template <class Accumulator, class T>
  double measure(T const& x, long const repeats)
  {
      // Hammer accumulators a couple of times to ensure the
      // instruction cache is full of our test code, and that we don't
      // measure the cost of a page fault for accessing the data page
      // containing the memory where the accumulators will be
      // allocated
      hammer<Accumulator>(x, repeats);
      hammer<Accumulator>(x, repeats);

      // Now start a timer
      boost::timer time;
      hammer<Accumulator>(x, repeats);  // This time, we'll measure
      return time.elapsed();
  }
}

int main()
{
    using namespace test;

    // first decide how many repetitions to measure
    long repeats = 100;
    double measured = 0;
    while (measured < 1.0 && repeats <= 10000000)
    {
        repeats *= 10;
        
        boost::timer time;

        hammer<plain_weight_running_total<double> >(.1, repeats);
        hammer<named_param_weight_running_total<double> >(
            (weight = .1, value = .2), repeats);

        measured = time.elapsed();
    }
    
    std::cout
        << "plain time:           "
        << measure<plain_weight_running_total<double> >(.1, repeats)
        << std::endl;
    
    std::cout
        << "named parameter time: "
        << measure<named_param_weight_running_total<double> >(
            (weight = .1, value = .2), repeats
        )
        << std::endl;

    // This is ultimately responsible for preventing all the test code
    // from being optimized away.  Change this to return 0 and you
    // unplug the whole test's life support system.
    return live_code < 0.;
}
Commit	Line	Data
7c673cae FG	1	// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
	2	// 2005. Distributed under the Boost Software License, Version
	3	// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
	4	// http://www.boost.org/LICENSE_1_0.txt)
	5
	6	#include <boost/parameter.hpp>
	7	#include <boost/timer.hpp>
	8	#include <iostream>
	9
	10	namespace test
	11	{
	12	//
	13	// This test measures the abstraction overhead of using the named
	14	// parameter interface. Some actual test results have been recorded
	15	// in timings.txt in this source file's directory, or
	16	// http://www.boost.org/libs/parameter/test/timings.txt.
	17	//
	18	// Caveats:
	19	//
	20	// 1. This test penalizes the named parameter library slightly, by
	21	// passing two arguments through the named interface, while
	22	// only passing one through the plain C++ interface.
	23	//
	24	// 2. This test does not measure the case where an ArgumentPack is
	25	// so large that it doesn't fit in the L1 cache.
	26	//
	27	// 3. Although we've tried to make this test as general as
	28	// possible, we are targeting it at a specific application.
	29	// Where that affects design decisions, we've noted it below in
	30	// *...*.
	31	//
	32	// 4. The first time you run this program, the time may not be
	33	// representative because of disk and memory cache effects, so
	34	// always run it multiple times and ignore the first
	35	// measurement. This approach will also allow you to estimate
	36	// the statistical error of your test by observing the
	37	// variation in the valid times.
	38	//
	39	// 5. Try to run this program on a machine that's otherwise idle,
	40	// or other processes and even device hardware interrupts may
	41	// interfere by causing caches to be flushed.
	42
	43	// Accumulator function object with plain C++ interface
	44	template <class T>
	45	struct plain_weight_running_total
	46	{
	47	plain_weight_running_total()
	48	#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
	49	: sum(T())
	50	#else
	51	: sum()
	52	#endif
	53	{}
	54
	55	void operator()(T w)
	56	{
	57	this->sum += w;
	58	}
	59
	60	T sum;
	61	};
	62
	63	BOOST_PARAMETER_KEYWORD(tag, weight)
	64	BOOST_PARAMETER_KEYWORD(tag, value)
65
66	// Accumulator function object with named parameter interface
67	template <class T>
68	struct named_param_weight_running_total
69	{
70	named_param_weight_running_total()
71	#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
72	: sum(T())
73	#else
74	: sum()
75	#endif
76	{}
77
78	template <class ArgumentPack>
79	void operator()(ArgumentPack const& variates)
80	{
81	this->sum += variates[weight];
82	}
83
84	T sum;
85	};
86
87	// This value is required to ensure that a smart compiler's dead
88	// code elimination doesn't optimize away anything we're testing.
89	// We'll use it to compute the return code of the executable to make
90	// sure it's needed.
91	double live_code;
92
93	// Call objects of the given Accumulator type repeatedly with x as
94	// an argument.
95	template <class Accumulator, class Arg>
96	void hammer(Arg const& x, long const repeats)
97	{
98	// Strategy: because the sum in an accumulator after each call
99	// depends on the previous value of the sum, the CPU's pipeline
100	// might be stalled while waiting for the previous addition to
101	// complete. Therefore, we allocate an array of accumulators,
102	// and update them in sequence, so that there's no dependency
103	// between adjacent addition operations.
104	//
105	// Additionally, if there were only one accumulator, the
106	// compiler or CPU might decide to update the value in a
107	// register rather that writing it back to memory. we want each
108	// operation to at least update the L1 cache. *** Note: This
109	// concern is specific to the particular application at which
110	// we're targeting the test. ***
111
112	// This has to be at least as large as the number of
113	// simultaneous accumulations that can be executing in the
114	// compiler pipeline. A safe number here is larger than the
115	// machine's maximum pipeline depth. If you want to test the L2
116	// or L3 cache, or main memory, you can increase the size of
117	// this array. 1024 is an upper limit on the pipeline depth of
118	// current vector machines.
119	const std::size_t number_of_accumulators = 1024;
120
121	Accumulator a[number_of_accumulators];
122
123	for (long iteration = 0; iteration < repeats; ++iteration)
124	{
125	for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
126	{
127	(*ap)(x);
128	}
129	}
130
131	// Accumulate all the partial sums to avoid dead code
132	// elimination.
133	for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
134	{
135	live_code += ap->sum;
136	}
137	}
138
139	// Measure the time required to hammer accumulators of the given
140	// type with the argument x.
141	template <class Accumulator, class T>
142	double measure(T const& x, long const repeats)
143	{
144	// Hammer accumulators a couple of times to ensure the
145	// instruction cache is full of our test code, and that we don't
146	// measure the cost of a page fault for accessing the data page
147	// containing the memory where the accumulators will be
148	// allocated
149	hammer<Accumulator>(x, repeats);
150	hammer<Accumulator>(x, repeats);
151
152	// Now start a timer
153	boost::timer time;
154	hammer<Accumulator>(x, repeats); // This time, we'll measure
155	return time.elapsed();
156	}
157	}
158
159	int main()
160	{
161	using namespace test;
162
163	// first decide how many repetitions to measure
164	long repeats = 100;
165	double measured = 0;
166	while (measured < 1.0 && repeats <= 10000000)
167	{
168	repeats *= 10;
169
170	boost::timer time;
171
172	hammer<plain_weight_running_total<double> >(.1, repeats);
173	hammer<named_param_weight_running_total<double> >(
174	(weight = .1, value = .2), repeats);
175
176	measured = time.elapsed();
177	}
178
179	std::cout
180	<< "plain time: "
181	<< measure<plain_weight_running_total<double> >(.1, repeats)
182	<< std::endl;
183
184	std::cout
185	<< "named parameter time: "
186	<< measure<named_param_weight_running_total<double> >(
187	(weight = .1, value = .2), repeats
188	)
189	<< std::endl;
190
191	// This is ultimately responsible for preventing all the test code
192	// from being optimized away. Change this to return 0 and you
193	// unplug the whole test's life support system.
194	return live_code < 0.;
195	}