]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Boost.Units - A C++ library for zero-overhead dimensional analysis and |
2 | // unit/quantity manipulation and conversion | |
3 | // | |
4 | // Copyright (C) 2003-2008 Matthias Christian Schabel | |
5 | // Copyright (C) 2008 Steven Watanabe | |
6 | // | |
7 | // Distributed under the Boost Software License, Version 1.0. (See | |
8 | // accompanying file LICENSE_1_0.txt or copy at | |
9 | // http://www.boost.org/LICENSE_1_0.txt) | |
10 | ||
11 | /** | |
12 | \file | |
13 | ||
14 | \brief performance.cpp | |
15 | ||
16 | \details | |
17 | Test runtime performance. | |
18 | ||
19 | Output: | |
20 | @verbatim | |
21 | ||
22 | multiplying ublas::matrix<double>(1000, 1000) : 25.03 seconds | |
23 | multiplying ublas::matrix<quantity>(1000, 1000) : 24.49 seconds | |
24 | tiled_matrix_multiply<double>(1000, 1000) : 1.12 seconds | |
25 | tiled_matrix_multiply<quantity>(1000, 1000) : 1.16 seconds | |
26 | solving y' = 1 - x + 4 * y with double: 1.97 seconds | |
27 | solving y' = 1 - x + 4 * y with quantity: 1.84 seconds | |
28 | ||
29 | @endverbatim | |
30 | **/ | |
31 | ||
32 | #define _SCL_SECURE_NO_WARNINGS | |
33 | ||
34 | #include <cstdlib> | |
35 | #include <ctime> | |
36 | #include <algorithm> | |
37 | #include <iostream> | |
38 | #include <iomanip> | |
39 | ||
40 | #include <boost/config.hpp> | |
41 | #include <boost/timer.hpp> | |
42 | #include <boost/utility/result_of.hpp> | |
43 | ||
44 | #ifdef BOOST_MSVC | |
45 | #pragma warning(push) | |
46 | #pragma warning(disable:4267; disable:4127; disable:4244; disable:4100) | |
47 | #endif | |
48 | ||
49 | #include <boost/numeric/ublas/matrix.hpp> | |
50 | ||
51 | #ifdef BOOST_MSVC | |
52 | #pragma warning(pop) | |
53 | #endif | |
54 | ||
55 | #include <boost/units/quantity.hpp> | |
56 | #include <boost/units/systems/si.hpp> | |
57 | #include <boost/units/cmath.hpp> | |
58 | #include <boost/units/io.hpp> | |
59 | ||
60 | enum { | |
61 | tile_block_size = 24 | |
62 | }; | |
63 | ||
64 | template<class T0, class T1, class Out> | |
65 | void tiled_multiply_carray_inner(T0* first, | |
66 | T1* second, | |
67 | Out* out, | |
68 | int totalwidth, | |
69 | int width2, | |
70 | int height1, | |
71 | int common) { | |
72 | for(int j = 0; j < height1; ++j) { | |
73 | for(int i = 0; i < width2; ++i) { | |
74 | Out value = out[j * totalwidth + i]; | |
75 | for(int k = 0; k < common; ++k) { | |
76 | value += first[k + totalwidth * j] * second[k * totalwidth + i]; | |
77 | } | |
78 | out[j * totalwidth + i] = value; | |
79 | } | |
80 | } | |
81 | } | |
82 | ||
83 | template<class T0, class T1, class Out> | |
84 | void tiled_multiply_carray_outer(T0* first, | |
85 | T1* second, | |
86 | Out* out, | |
87 | int width2, | |
88 | int height1, | |
89 | int common) { | |
90 | std::fill_n(out, width2 * height1, Out()); | |
91 | int j = 0; | |
92 | for(; j < height1 - tile_block_size; j += tile_block_size) { | |
93 | int i = 0; | |
94 | for(; i < width2 - tile_block_size; i += tile_block_size) { | |
95 | int k = 0; | |
96 | for(; k < common - tile_block_size; k += tile_block_size) { | |
97 | tiled_multiply_carray_inner( | |
98 | &first[k + width2 * j], | |
99 | &second[k * width2 + i], | |
100 | &out[j * width2 + i], | |
101 | width2, | |
102 | tile_block_size, | |
103 | tile_block_size, | |
104 | tile_block_size); | |
105 | } | |
106 | tiled_multiply_carray_inner( | |
107 | &first[k + width2 * j], | |
108 | &second[k * width2 + i], | |
109 | &out[j * width2 + i], | |
110 | width2, | |
111 | tile_block_size, | |
112 | tile_block_size, | |
113 | common - k); | |
114 | } | |
115 | int k = 0; | |
116 | for(; k < common - tile_block_size; k += tile_block_size) { | |
117 | tiled_multiply_carray_inner( | |
118 | &first[k + width2 * j], | |
119 | &second[k * width2 + i], | |
120 | &out[j * width2 + i], | |
121 | width2, width2 - i, | |
122 | tile_block_size, | |
123 | tile_block_size); | |
124 | } | |
125 | tiled_multiply_carray_inner( | |
126 | &first[k + width2 * j], | |
127 | &second[k * width2 + i], | |
128 | &out[j * width2 + i], | |
129 | width2, width2 - i, | |
130 | tile_block_size, | |
131 | common - k); | |
132 | } | |
133 | int i = 0; | |
134 | for(; i < width2 - tile_block_size; i += tile_block_size) { | |
135 | int k = 0; | |
136 | for(; k < common - tile_block_size; k += tile_block_size) { | |
137 | tiled_multiply_carray_inner( | |
138 | &first[k + width2 * j], | |
139 | &second[k * width2 + i], | |
140 | &out[j * width2 + i], | |
141 | width2, | |
142 | tile_block_size, | |
143 | height1 - j, | |
144 | tile_block_size); | |
145 | } | |
146 | tiled_multiply_carray_inner( | |
147 | &first[k + width2 * j], | |
148 | &second[k * width2 + i], | |
149 | &out[j * width2 + i], | |
150 | width2, | |
151 | tile_block_size, | |
152 | height1 - j, | |
153 | common - k); | |
154 | } | |
155 | int k = 0; | |
156 | for(; k < common - tile_block_size; k += tile_block_size) { | |
157 | tiled_multiply_carray_inner( | |
158 | &first[k + width2 * j], | |
159 | &second[k * width2 + i], | |
160 | &out[j * width2 + i], | |
161 | width2, | |
162 | width2 - i, | |
163 | height1 - j, | |
164 | tile_block_size); | |
165 | } | |
166 | tiled_multiply_carray_inner( | |
167 | &first[k + width2 * j], | |
168 | &second[k * width2 + i], | |
169 | &out[j * width2 + i], | |
170 | width2, | |
171 | width2 - i, | |
172 | height1 - j, | |
173 | common - k); | |
174 | } | |
175 | ||
176 | enum { max_value = 1000}; | |
177 | ||
178 | template<class F, class T, class N, class R> | |
11fdf7f2 | 179 | BOOST_CXX14_CONSTEXPR |
7c673cae FG |
180 | R solve_differential_equation(F f, T lower, T upper, N steps, R start) { |
181 | typedef typename F::template result<T, R>::type f_result; | |
182 | T h = (upper - lower) / (1.0*steps); | |
183 | for(N i = N(); i < steps; ++i) { | |
184 | R y = start; | |
185 | T x = lower + h * (1.0*i); | |
186 | f_result k1 = f(x, y); | |
187 | f_result k2 = f(x + h / 2.0, y + h * k1 / 2.0); | |
188 | f_result k3 = f(x + h / 2.0, y + h * k2 / 2.0); | |
189 | f_result k4 = f(x + h, y + h * k3); | |
190 | start = y + h * (k1 + 2.0 * k2 + 2.0 * k3 + k4) / 6.0; | |
191 | } | |
192 | return(start); | |
193 | } | |
194 | ||
195 | using namespace boost::units; | |
196 | ||
197 | //y' = 1 - x + 4 * y | |
198 | struct f { | |
199 | template<class Arg1, class Arg2> struct result; | |
200 | ||
11fdf7f2 | 201 | BOOST_CONSTEXPR double operator()(const double& x, const double& y) const { |
7c673cae FG |
202 | return(1.0 - x + 4.0 * y); |
203 | } | |
204 | ||
205 | boost::units::quantity<boost::units::si::velocity> | |
11fdf7f2 TL |
206 | BOOST_CONSTEXPR operator()(const quantity<si::time>& x, |
207 | const quantity<si::length>& y) const { | |
7c673cae FG |
208 | using namespace boost::units; |
209 | using namespace si; | |
210 | return(1.0 * meters / second - | |
211 | x * meters / pow<2>(seconds) + | |
212 | 4.0 * y / seconds ); | |
213 | } | |
214 | }; | |
215 | ||
216 | template<> | |
217 | struct f::result<double,double> { | |
218 | typedef double type; | |
219 | }; | |
220 | ||
221 | template<> | |
222 | struct f::result<quantity<si::time>, quantity<si::length> > { | |
223 | typedef quantity<si::velocity> type; | |
224 | }; | |
225 | ||
226 | ||
227 | ||
228 | //y' = 1 - x + 4 * y | |
229 | //y' - 4 * y = 1 - x | |
230 | //e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx | |
231 | //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx | |
232 | ||
233 | //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx | |
234 | //d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x)) | |
235 | //y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C | |
236 | //y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x) | |
237 | //y = 1/4 * x - 3/16 + C * e ^ (4 * x) | |
238 | ||
239 | //y(0) = 1 | |
240 | //1 = - 3/16 + C | |
241 | //C = 19/16 | |
242 | //y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x) | |
243 | ||
244 | ||
245 | ||
246 | int main() { | |
247 | boost::numeric::ublas::matrix<double> ublas_result; | |
248 | { | |
249 | boost::numeric::ublas::matrix<double> m1(max_value, max_value); | |
250 | boost::numeric::ublas::matrix<double> m2(max_value, max_value); | |
251 | std::srand(1492); | |
252 | for(int i = 0; i < max_value; ++i) { | |
253 | for(int j = 0; j < max_value; ++j) { | |
254 | m1(i,j) = std::rand(); | |
255 | m2(i,j) = std::rand(); | |
256 | } | |
257 | } | |
258 | std::cout << "multiplying ublas::matrix<double>(" | |
259 | << max_value << ", " << max_value << ") : "; | |
260 | boost::timer timer; | |
261 | ublas_result = (prod(m1, m2)); | |
262 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
263 | } | |
264 | typedef boost::numeric::ublas::matrix< | |
265 | boost::units::quantity<boost::units::si::dimensionless> | |
266 | > matrix_type; | |
267 | matrix_type ublas_resultq; | |
268 | { | |
269 | matrix_type m1(max_value, max_value); | |
270 | matrix_type m2(max_value, max_value); | |
271 | std::srand(1492); | |
272 | for(int i = 0; i < max_value; ++i) { | |
273 | for(int j = 0; j < max_value; ++j) { | |
274 | m1(i,j) = std::rand(); | |
275 | m2(i,j) = std::rand(); | |
276 | } | |
277 | } | |
278 | std::cout << "multiplying ublas::matrix<quantity>(" | |
279 | << max_value << ", " << max_value << ") : "; | |
280 | boost::timer timer; | |
281 | ublas_resultq = (prod(m1, m2)); | |
282 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
283 | } | |
284 | std::vector<double> cresult(max_value * max_value); | |
285 | { | |
286 | std::vector<double> m1(max_value * max_value); | |
287 | std::vector<double> m2(max_value * max_value); | |
288 | std::srand(1492); | |
289 | for(int i = 0; i < max_value * max_value; ++i) { | |
290 | m1[i] = std::rand(); | |
291 | m2[i] = std::rand(); | |
292 | } | |
293 | std::cout << "tiled_matrix_multiply<double>(" | |
294 | << max_value << ", " << max_value << ") : "; | |
295 | boost::timer timer; | |
296 | tiled_multiply_carray_outer( | |
297 | &m1[0], | |
298 | &m2[0], | |
299 | &cresult[0], | |
300 | max_value, | |
301 | max_value, | |
302 | max_value); | |
303 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
304 | } | |
305 | std::vector< | |
306 | boost::units::quantity<boost::units::si::energy> | |
307 | > cresultq(max_value * max_value); | |
308 | { | |
309 | std::vector< | |
310 | boost::units::quantity<boost::units::si::force> | |
311 | > m1(max_value * max_value); | |
312 | std::vector< | |
313 | boost::units::quantity<boost::units::si::length> | |
314 | > m2(max_value * max_value); | |
315 | std::srand(1492); | |
316 | for(int i = 0; i < max_value * max_value; ++i) { | |
317 | m1[i] = std::rand() * boost::units::si::newtons; | |
318 | m2[i] = std::rand() * boost::units::si::meters; | |
319 | } | |
320 | std::cout << "tiled_matrix_multiply<quantity>(" | |
321 | << max_value << ", " << max_value << ") : "; | |
322 | boost::timer timer; | |
323 | tiled_multiply_carray_outer( | |
324 | &m1[0], | |
325 | &m2[0], | |
326 | &cresultq[0], | |
327 | max_value, | |
328 | max_value, | |
329 | max_value); | |
330 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
331 | } | |
332 | for(int i = 0; i < max_value; ++i) { | |
333 | for(int j = 0; j < max_value; ++j) { | |
11fdf7f2 | 334 | const double diff = |
7c673cae FG |
335 | std::abs(ublas_result(i,j) - cresult[i * max_value + j]); |
336 | if(diff > ublas_result(i,j) /1e14) { | |
337 | std::cout << std::setprecision(15) << "Uh Oh. ublas_result(" | |
338 | << i << "," << j << ") = " << ublas_result(i,j) | |
339 | << std::endl | |
340 | << "cresult[" << i << " * " << max_value << " + " | |
341 | << j << "] = " << cresult[i * max_value + j] | |
342 | << std::endl; | |
343 | return(EXIT_FAILURE); | |
344 | } | |
345 | } | |
346 | } | |
347 | { | |
348 | std::vector<double> values(1000); | |
349 | std::cout << "solving y' = 1 - x + 4 * y with double: "; | |
350 | boost::timer timer; | |
351 | for(int i = 0; i < 1000; ++i) { | |
11fdf7f2 | 352 | const double x = .1 * i; |
7c673cae FG |
353 | values[i] = solve_differential_equation(f(), 0.0, x, i * 100, 1.0); |
354 | } | |
355 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
356 | for(int i = 0; i < 1000; ++i) { | |
11fdf7f2 TL |
357 | const double x = .1 * i; |
358 | const double value = 1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x); | |
7c673cae FG |
359 | if(std::abs(values[i] - value) > value / 1e9) { |
360 | std::cout << std::setprecision(15) << "i = : " << i | |
361 | << ", value = " << value << " approx = " << values[i] | |
362 | << std::endl; | |
363 | return(EXIT_FAILURE); | |
364 | } | |
365 | } | |
366 | } | |
367 | { | |
368 | using namespace boost::units; | |
369 | using namespace si; | |
370 | std::vector<quantity<length> > values(1000); | |
371 | std::cout << "solving y' = 1 - x + 4 * y with quantity: "; | |
372 | boost::timer timer; | |
373 | for(int i = 0; i < 1000; ++i) { | |
11fdf7f2 | 374 | const quantity<si::time> x = .1 * i * seconds; |
7c673cae FG |
375 | values[i] = solve_differential_equation( |
376 | f(), | |
377 | 0.0 * seconds, | |
378 | x, | |
379 | i * 100, | |
380 | 1.0 * meters); | |
381 | } | |
382 | std::cout << timer.elapsed() << " seconds" << std::endl; | |
383 | for(int i = 0; i < 1000; ++i) { | |
11fdf7f2 TL |
384 | const double x = .1 * i; |
385 | const quantity<si::length> value = | |
7c673cae FG |
386 | (1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x)) * meters; |
387 | if(abs(values[i] - value) > value / 1e9) { | |
388 | std::cout << std::setprecision(15) << "i = : " << i | |
389 | << ", value = " << value << " approx = " | |
390 | << values[i] << std::endl; | |
391 | return(EXIT_FAILURE); | |
392 | } | |
393 | } | |
394 | } | |
395 | } |