]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright David Abrahams, Matthias Troyer, Michael Gauckler |
2 | // 2005. Distributed under the Boost Software License, Version | |
3 | // 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
4 | // http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #include <boost/parameter.hpp> | |
7 | #include <boost/timer.hpp> | |
8 | #include <iostream> | |
9 | ||
10 | namespace test | |
11 | { | |
12 | // | |
13 | // This test measures the abstraction overhead of using the named | |
14 | // parameter interface. Some actual test results have been recorded | |
15 | // in timings.txt in this source file's directory, or | |
16 | // http://www.boost.org/libs/parameter/test/timings.txt. | |
17 | // | |
18 | // Caveats: | |
19 | // | |
20 | // 1. This test penalizes the named parameter library slightly, by | |
21 | // passing two arguments through the named interface, while | |
22 | // only passing one through the plain C++ interface. | |
23 | // | |
24 | // 2. This test does not measure the case where an ArgumentPack is | |
25 | // so large that it doesn't fit in the L1 cache. | |
26 | // | |
27 | // 3. Although we've tried to make this test as general as | |
28 | // possible, we are targeting it at a specific application. | |
29 | // Where that affects design decisions, we've noted it below in | |
30 | // ***...***. | |
31 | // | |
32 | // 4. The first time you run this program, the time may not be | |
33 | // representative because of disk and memory cache effects, so | |
34 | // always run it multiple times and ignore the first | |
35 | // measurement. This approach will also allow you to estimate | |
36 | // the statistical error of your test by observing the | |
37 | // variation in the valid times. | |
38 | // | |
39 | // 5. Try to run this program on a machine that's otherwise idle, | |
40 | // or other processes and even device hardware interrupts may | |
41 | // interfere by causing caches to be flushed. | |
42 | ||
43 | // Accumulator function object with plain C++ interface | |
44 | template <class T> | |
45 | struct plain_weight_running_total | |
46 | { | |
47 | plain_weight_running_total() | |
48 | #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) | |
49 | : sum(T()) | |
50 | #else | |
51 | : sum() | |
52 | #endif | |
53 | {} | |
54 | ||
55 | void operator()(T w) | |
56 | { | |
57 | this->sum += w; | |
58 | } | |
59 | ||
60 | T sum; | |
61 | }; | |
62 | ||
63 | BOOST_PARAMETER_KEYWORD(tag, weight) | |
64 | BOOST_PARAMETER_KEYWORD(tag, value) | |
65 | ||
66 | // Accumulator function object with named parameter interface | |
67 | template <class T> | |
68 | struct named_param_weight_running_total | |
69 | { | |
70 | named_param_weight_running_total() | |
71 | #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) | |
72 | : sum(T()) | |
73 | #else | |
74 | : sum() | |
75 | #endif | |
76 | {} | |
77 | ||
78 | template <class ArgumentPack> | |
79 | void operator()(ArgumentPack const& variates) | |
80 | { | |
81 | this->sum += variates[weight]; | |
82 | } | |
83 | ||
84 | T sum; | |
85 | }; | |
86 | ||
87 | // This value is required to ensure that a smart compiler's dead | |
88 | // code elimination doesn't optimize away anything we're testing. | |
89 | // We'll use it to compute the return code of the executable to make | |
90 | // sure it's needed. | |
91 | double live_code; | |
92 | ||
93 | // Call objects of the given Accumulator type repeatedly with x as | |
94 | // an argument. | |
95 | template <class Accumulator, class Arg> | |
96 | void hammer(Arg const& x, long const repeats) | |
97 | { | |
98 | // Strategy: because the sum in an accumulator after each call | |
99 | // depends on the previous value of the sum, the CPU's pipeline | |
100 | // might be stalled while waiting for the previous addition to | |
101 | // complete. Therefore, we allocate an array of accumulators, | |
102 | // and update them in sequence, so that there's no dependency | |
103 | // between adjacent addition operations. | |
104 | // | |
105 | // Additionally, if there were only one accumulator, the | |
106 | // compiler or CPU might decide to update the value in a | |
107 | // register rather that writing it back to memory. we want each | |
108 | // operation to at least update the L1 cache. *** Note: This | |
109 | // concern is specific to the particular application at which | |
110 | // we're targeting the test. *** | |
111 | ||
112 | // This has to be at least as large as the number of | |
113 | // simultaneous accumulations that can be executing in the | |
114 | // compiler pipeline. A safe number here is larger than the | |
115 | // machine's maximum pipeline depth. If you want to test the L2 | |
116 | // or L3 cache, or main memory, you can increase the size of | |
117 | // this array. 1024 is an upper limit on the pipeline depth of | |
118 | // current vector machines. | |
119 | const std::size_t number_of_accumulators = 1024; | |
120 | ||
121 | Accumulator a[number_of_accumulators]; | |
122 | ||
123 | for (long iteration = 0; iteration < repeats; ++iteration) | |
124 | { | |
125 | for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) | |
126 | { | |
127 | (*ap)(x); | |
128 | } | |
129 | } | |
130 | ||
131 | // Accumulate all the partial sums to avoid dead code | |
132 | // elimination. | |
133 | for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) | |
134 | { | |
135 | live_code += ap->sum; | |
136 | } | |
137 | } | |
138 | ||
139 | // Measure the time required to hammer accumulators of the given | |
140 | // type with the argument x. | |
141 | template <class Accumulator, class T> | |
142 | double measure(T const& x, long const repeats) | |
143 | { | |
144 | // Hammer accumulators a couple of times to ensure the | |
145 | // instruction cache is full of our test code, and that we don't | |
146 | // measure the cost of a page fault for accessing the data page | |
147 | // containing the memory where the accumulators will be | |
148 | // allocated | |
149 | hammer<Accumulator>(x, repeats); | |
150 | hammer<Accumulator>(x, repeats); | |
151 | ||
152 | // Now start a timer | |
153 | boost::timer time; | |
154 | hammer<Accumulator>(x, repeats); // This time, we'll measure | |
155 | return time.elapsed(); | |
156 | } | |
157 | } | |
158 | ||
159 | int main() | |
160 | { | |
161 | using namespace test; | |
162 | ||
163 | // first decide how many repetitions to measure | |
164 | long repeats = 100; | |
165 | double measured = 0; | |
166 | while (measured < 1.0 && repeats <= 10000000) | |
167 | { | |
168 | repeats *= 10; | |
169 | ||
170 | boost::timer time; | |
171 | ||
172 | hammer<plain_weight_running_total<double> >(.1, repeats); | |
173 | hammer<named_param_weight_running_total<double> >( | |
174 | (weight = .1, value = .2), repeats); | |
175 | ||
176 | measured = time.elapsed(); | |
177 | } | |
178 | ||
179 | std::cout | |
180 | << "plain time: " | |
181 | << measure<plain_weight_running_total<double> >(.1, repeats) | |
182 | << std::endl; | |
183 | ||
184 | std::cout | |
185 | << "named parameter time: " | |
186 | << measure<named_param_weight_running_total<double> >( | |
187 | (weight = .1, value = .2), repeats | |
188 | ) | |
189 | << std::endl; | |
190 | ||
191 | // This is ultimately responsible for preventing all the test code | |
192 | // from being optimized away. Change this to return 0 and you | |
193 | // unplug the whole test's life support system. | |
194 | return live_code < 0.; | |
195 | } |