1 // Boost.Units - A C++ library for zero-overhead dimensional analysis and
2 // unit/quantity manipulation and conversion
4 // Copyright (C) 2003-2008 Matthias Christian Schabel
5 // Copyright (C) 2008 Steven Watanabe
7 // Distributed under the Boost Software License, Version 1.0. (See
8 // accompanying file LICENSE_1_0.txt or copy at
9 // http://www.boost.org/LICENSE_1_0.txt)
14 \brief performance.cpp
17 Test runtime performance.
22 multiplying ublas::matrix<double>(1000, 1000) : 25.03 seconds
23 multiplying ublas::matrix<quantity>(1000, 1000) : 24.49 seconds
24 tiled_matrix_multiply<double>(1000, 1000) : 1.12 seconds
25 tiled_matrix_multiply<quantity>(1000, 1000) : 1.16 seconds
26 solving y' = 1 - x + 4 * y with double: 1.97 seconds
27 solving y' = 1 - x + 4 * y with quantity: 1.84 seconds
32 #define _SCL_SECURE_NO_WARNINGS
40 #include <boost/config.hpp>
41 #include <boost/timer.hpp>
42 #include <boost/utility/result_of.hpp>
46 #pragma warning(disable:4267; disable:4127; disable:4244; disable:4100)
49 #include <boost/numeric/ublas/matrix.hpp>
55 #include <boost/units/quantity.hpp>
56 #include <boost/units/systems/si.hpp>
57 #include <boost/units/cmath.hpp>
58 #include <boost/units/io.hpp>
64 template<class T0
, class T1
, class Out
>
65 void tiled_multiply_carray_inner(T0
* first
,
72 for(int j
= 0; j
< height1
; ++j
) {
73 for(int i
= 0; i
< width2
; ++i
) {
74 Out value
= out
[j
* totalwidth
+ i
];
75 for(int k
= 0; k
< common
; ++k
) {
76 value
+= first
[k
+ totalwidth
* j
] * second
[k
* totalwidth
+ i
];
78 out
[j
* totalwidth
+ i
] = value
;
83 template<class T0
, class T1
, class Out
>
84 void tiled_multiply_carray_outer(T0
* first
,
90 std::fill_n(out
, width2
* height1
, Out());
92 for(; j
< height1
- tile_block_size
; j
+= tile_block_size
) {
94 for(; i
< width2
- tile_block_size
; i
+= tile_block_size
) {
96 for(; k
< common
- tile_block_size
; k
+= tile_block_size
) {
97 tiled_multiply_carray_inner(
98 &first
[k
+ width2
* j
],
99 &second
[k
* width2
+ i
],
100 &out
[j
* width2
+ i
],
106 tiled_multiply_carray_inner(
107 &first
[k
+ width2
* j
],
108 &second
[k
* width2
+ i
],
109 &out
[j
* width2
+ i
],
116 for(; k
< common
- tile_block_size
; k
+= tile_block_size
) {
117 tiled_multiply_carray_inner(
118 &first
[k
+ width2
* j
],
119 &second
[k
* width2
+ i
],
120 &out
[j
* width2
+ i
],
125 tiled_multiply_carray_inner(
126 &first
[k
+ width2
* j
],
127 &second
[k
* width2
+ i
],
128 &out
[j
* width2
+ i
],
134 for(; i
< width2
- tile_block_size
; i
+= tile_block_size
) {
136 for(; k
< common
- tile_block_size
; k
+= tile_block_size
) {
137 tiled_multiply_carray_inner(
138 &first
[k
+ width2
* j
],
139 &second
[k
* width2
+ i
],
140 &out
[j
* width2
+ i
],
146 tiled_multiply_carray_inner(
147 &first
[k
+ width2
* j
],
148 &second
[k
* width2
+ i
],
149 &out
[j
* width2
+ i
],
156 for(; k
< common
- tile_block_size
; k
+= tile_block_size
) {
157 tiled_multiply_carray_inner(
158 &first
[k
+ width2
* j
],
159 &second
[k
* width2
+ i
],
160 &out
[j
* width2
+ i
],
166 tiled_multiply_carray_inner(
167 &first
[k
+ width2
* j
],
168 &second
[k
* width2
+ i
],
169 &out
[j
* width2
+ i
],
176 enum { max_value
= 1000};
178 template<class F
, class T
, class N
, class R
>
179 R
solve_differential_equation(F f
, T lower
, T upper
, N steps
, R start
) {
180 typedef typename
F::template result
<T
, R
>::type f_result
;
181 T h
= (upper
- lower
) / (1.0*steps
);
182 for(N i
= N(); i
< steps
; ++i
) {
184 T x
= lower
+ h
* (1.0*i
);
185 f_result k1
= f(x
, y
);
186 f_result k2
= f(x
+ h
/ 2.0, y
+ h
* k1
/ 2.0);
187 f_result k3
= f(x
+ h
/ 2.0, y
+ h
* k2
/ 2.0);
188 f_result k4
= f(x
+ h
, y
+ h
* k3
);
189 start
= y
+ h
* (k1
+ 2.0 * k2
+ 2.0 * k3
+ k4
) / 6.0;
194 using namespace boost::units
;
198 template<class Arg1
, class Arg2
> struct result
;
200 double operator()(const double& x
, const double& y
) const {
201 return(1.0 - x
+ 4.0 * y
);
204 boost::units::quantity
<boost::units::si::velocity
>
205 operator()(const quantity
<si::time
>& x
,
206 const quantity
<si::length
>& y
) const {
207 using namespace boost::units
;
209 return(1.0 * meters
/ second
-
210 x
* meters
/ pow
<2>(seconds
) +
216 struct f::result
<double,double> {
221 struct f::result
<quantity
<si::time
>, quantity
<si::length
> > {
222 typedef quantity
<si::velocity
> type
;
229 //e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx
230 //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx
232 //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx
233 //d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x))
234 //y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C
235 //y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x)
236 //y = 1/4 * x - 3/16 + C * e ^ (4 * x)
241 //y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x)
246 boost::numeric::ublas::matrix
<double> ublas_result
;
248 boost::numeric::ublas::matrix
<double> m1(max_value
, max_value
);
249 boost::numeric::ublas::matrix
<double> m2(max_value
, max_value
);
251 for(int i
= 0; i
< max_value
; ++i
) {
252 for(int j
= 0; j
< max_value
; ++j
) {
253 m1(i
,j
) = std::rand();
254 m2(i
,j
) = std::rand();
257 std::cout
<< "multiplying ublas::matrix<double>("
258 << max_value
<< ", " << max_value
<< ") : ";
260 ublas_result
= (prod(m1
, m2
));
261 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
263 typedef boost::numeric::ublas::matrix
<
264 boost::units::quantity
<boost::units::si::dimensionless
>
266 matrix_type ublas_resultq
;
268 matrix_type
m1(max_value
, max_value
);
269 matrix_type
m2(max_value
, max_value
);
271 for(int i
= 0; i
< max_value
; ++i
) {
272 for(int j
= 0; j
< max_value
; ++j
) {
273 m1(i
,j
) = std::rand();
274 m2(i
,j
) = std::rand();
277 std::cout
<< "multiplying ublas::matrix<quantity>("
278 << max_value
<< ", " << max_value
<< ") : ";
280 ublas_resultq
= (prod(m1
, m2
));
281 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
283 std::vector
<double> cresult(max_value
* max_value
);
285 std::vector
<double> m1(max_value
* max_value
);
286 std::vector
<double> m2(max_value
* max_value
);
288 for(int i
= 0; i
< max_value
* max_value
; ++i
) {
292 std::cout
<< "tiled_matrix_multiply<double>("
293 << max_value
<< ", " << max_value
<< ") : ";
295 tiled_multiply_carray_outer(
302 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
305 boost::units::quantity
<boost::units::si::energy
>
306 > cresultq(max_value
* max_value
);
309 boost::units::quantity
<boost::units::si::force
>
310 > m1(max_value
* max_value
);
312 boost::units::quantity
<boost::units::si::length
>
313 > m2(max_value
* max_value
);
315 for(int i
= 0; i
< max_value
* max_value
; ++i
) {
316 m1
[i
] = std::rand() * boost::units::si::newtons
;
317 m2
[i
] = std::rand() * boost::units::si::meters
;
319 std::cout
<< "tiled_matrix_multiply<quantity>("
320 << max_value
<< ", " << max_value
<< ") : ";
322 tiled_multiply_carray_outer(
329 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
331 for(int i
= 0; i
< max_value
; ++i
) {
332 for(int j
= 0; j
< max_value
; ++j
) {
334 std::abs(ublas_result(i
,j
) - cresult
[i
* max_value
+ j
]);
335 if(diff
> ublas_result(i
,j
) /1e14
) {
336 std::cout
<< std::setprecision(15) << "Uh Oh. ublas_result("
337 << i
<< "," << j
<< ") = " << ublas_result(i
,j
)
339 << "cresult[" << i
<< " * " << max_value
<< " + "
340 << j
<< "] = " << cresult
[i
* max_value
+ j
]
342 return(EXIT_FAILURE
);
347 std::vector
<double> values(1000);
348 std::cout
<< "solving y' = 1 - x + 4 * y with double: ";
350 for(int i
= 0; i
< 1000; ++i
) {
352 values
[i
] = solve_differential_equation(f(), 0.0, x
, i
* 100, 1.0);
354 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
355 for(int i
= 0; i
< 1000; ++i
) {
357 double value
= 1.0/4.0 * x
- 3.0/16.0 + 19.0/16.0 * std::exp(4 * x
);
358 if(std::abs(values
[i
] - value
) > value
/ 1e9
) {
359 std::cout
<< std::setprecision(15) << "i = : " << i
360 << ", value = " << value
<< " approx = " << values
[i
]
362 return(EXIT_FAILURE
);
367 using namespace boost::units
;
369 std::vector
<quantity
<length
> > values(1000);
370 std::cout
<< "solving y' = 1 - x + 4 * y with quantity: ";
372 for(int i
= 0; i
< 1000; ++i
) {
373 quantity
<si::time
> x
= .1 * i
* seconds
;
374 values
[i
] = solve_differential_equation(
381 std::cout
<< timer
.elapsed() << " seconds" << std::endl
;
382 for(int i
= 0; i
< 1000; ++i
) {
384 quantity
<si::length
> value
=
385 (1.0/4.0 * x
- 3.0/16.0 + 19.0/16.0 * std::exp(4 * x
)) * meters
;
386 if(abs(values
[i
] - value
) > value
/ 1e9
) {
387 std::cout
<< std::setprecision(15) << "i = : " << i
388 << ", value = " << value
<< " approx = "
389 << values
[i
] << std::endl
;
390 return(EXIT_FAILURE
);