1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright Christopher Kormanyos 2020.
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
8 // This example exercises Boost.Multiprecision in concurrent
9 // multi-threaded environments. To do so a loop involving
10 // non-trivial calculations of numerous function values
11 // has been set up within both concurrent as well as
12 // sequential running environments. In particular,
13 // this example uses an AGM method to do a "from the ground up"
14 // calculation of logarithms. The logarithm functions values
15 // are compared with the values from Boost.Multiprecision's
16 // specific log functions for the relevant backends.
17 // The log GM here is not optimized or intended for
18 // high-performance work, but can be taken as an
19 // interesting example of an AGM iteration if helpful.
21 // This example has been initially motivated in part
23 // https://github.com/boostorg/multiprecision/pull/211
25 // We find the following performance data here:
26 // https://github.com/boostorg/multiprecision/pull/213
29 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
30 // result_is_ok_sequential: true, calculation_time_sequential: 48.5s
33 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
34 // result_is_ok_sequential: true, calculation_time_sequential: 50.4s
37 // result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
38 // result_is_ok_sequential: true, calculation_time_sequential: 12.4s
41 // result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
42 // result_is_ok_sequential: true, calculation_time_sequential: 1.9s
54 #include <boost/math/constants/constants.hpp>
55 #include <boost/math/special_functions/prime.hpp>
57 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
58 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
59 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
60 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
62 #if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
63 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
64 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
65 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
66 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
69 #if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70 #include <boost/multiprecision/cpp_dec_float.hpp>
72 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::cpp_dec_float
<501>,
73 boost::multiprecision::et_off
>;
75 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76 #include <boost/multiprecision/cpp_bin_float.hpp>
78 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::cpp_bin_float
<501>,
79 boost::multiprecision::et_off
>;
81 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82 #include <boost/multiprecision/gmp.hpp>
84 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::gmp_float
<501>,
85 boost::multiprecision::et_off
>;
87 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88 #include <boost/multiprecision/mpfr.hpp>
90 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::mpfr_float_backend
<501>,
91 boost::multiprecision::et_off
>;
94 #error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
97 namespace boost
{ namespace multiprecision
{ namespace exercise_threading
{
101 namespace my_concurrency
{
102 template<typename index_type
,
103 typename callable_function_type
>
104 void parallel_for(index_type start
,
106 callable_function_type parallel_function
)
108 // Estimate the number of threads available.
109 static const unsigned int number_of_threads_hint
=
110 std::thread::hardware_concurrency();
112 static const unsigned int number_of_threads_total
=
113 ((number_of_threads_hint
== 0U) ? 4U : number_of_threads_hint
);
115 // Use only 3/4 of the available cores.
116 static const unsigned int number_of_threads
= number_of_threads_total
- (number_of_threads_total
/ 8U);
118 std::cout
<< "Executing with " << number_of_threads
<< " threads" << std::endl
;
120 // Set the size of a slice for the range functions.
121 index_type n
= index_type(end
- start
) + index_type(1);
124 static_cast<index_type
>(std::round(n
/ static_cast<float>(number_of_threads
)));
126 slice
= (std::max
)(slice
, index_type(1));
130 [¶llel_function
](index_type index_lo
, index_type index_hi
)
132 for(index_type i
= index_lo
; i
< index_hi
; ++i
)
134 parallel_function(i
);
138 // Create the thread pool and launch the jobs.
139 std::vector
<std::thread
> pool
;
141 pool
.reserve(number_of_threads
);
143 index_type i1
= start
;
144 index_type i2
= (std::min
)(index_type(start
+ slice
), end
);
146 for(index_type i
= 0U; ((index_type(i
+ index_type(1U)) < number_of_threads
) && (i1
< end
)); ++i
)
148 pool
.emplace_back(launch_range
, i1
, i2
);
152 i2
= (std::min
)(index_type(i2
+ slice
), end
);
157 pool
.emplace_back(launch_range
, i1
, end
);
160 // Wait for the jobs to finish.
161 for(std::thread
& thread_in_pool
: pool
)
163 if(thread_in_pool
.joinable())
165 thread_in_pool
.join();
169 } // namespace my_concurrency
171 template<typename FloatingPointType
,
172 typename UnsignedIntegralType
>
173 FloatingPointType
pown(const FloatingPointType
& b
, const UnsignedIntegralType
& p
)
175 // Calculate (b ^ p).
177 using local_floating_point_type
= FloatingPointType
;
178 using local_unsigned_integral_type
= UnsignedIntegralType
;
180 local_floating_point_type result
;
182 if (p
== local_unsigned_integral_type(0U)) { result
= local_floating_point_type(1U); }
183 else if(p
== local_unsigned_integral_type(1U)) { result
= b
; }
184 else if(p
== local_unsigned_integral_type(2U)) { result
= b
; result
*= b
; }
187 result
= local_floating_point_type(1U);
189 local_floating_point_type
y(b
);
191 for(local_unsigned_integral_type
p_local(p
); p_local
!= local_unsigned_integral_type(0U); p_local
>>= 1U)
193 if((static_cast<unsigned>(p_local
) & 1U) != 0U)
205 const std::vector
<std::uint32_t>& primes()
207 static const std::vector
<std::uint32_t> my_primes
=
208 []() -> std::vector
<std::uint32_t>
210 std::vector
<std::uint32_t> local_primes(10000U);
212 // Get exactly 10,000 primes.
213 for(auto i
= 0U; i
< local_primes
.size(); ++i
)
215 local_primes
[i
] = boost::math::prime(i
);
224 } // namespace detail
226 template<typename FloatingPointType
>
227 FloatingPointType
log(const FloatingPointType
& x
)
229 // Use an AGM method to compute the logarithm of x.
231 // For values less than 1 invert the argument and
232 // remember (in this case) to negate the result below.
233 const bool b_negate
= (x
< 1);
235 const FloatingPointType xx
= ((b_negate
== false) ? x
: 1 / x
);
238 // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
240 FloatingPointType
ak(1U);
242 const float n_times_factor
= static_cast<float>(static_cast<float>(std::numeric_limits
<FloatingPointType
>::digits10
) * 1.67F
);
243 const float lgx_over_lg2
= std::log(static_cast<float>(xx
)) / std::log(2.0F
);
245 std::int32_t m
= static_cast<std::int32_t>(n_times_factor
- lgx_over_lg2
);
247 // Ensure that the resulting power is non-negative.
248 // Also enforce that m >= 8.
249 m
= (std::max
)(m
, static_cast<std::int32_t>(8));
251 FloatingPointType bk
= detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m
));
256 FloatingPointType
ak_tmp(0U);
260 // Determine the requested precision of the upcoming iteration in units of digits10.
261 const FloatingPointType target_tolerance
= sqrt(std::numeric_limits
<FloatingPointType
>::epsilon()) / 100;
263 for(std::int32_t k
= static_cast<std::int32_t>(0); k
< static_cast<std::int32_t>(64); ++k
)
267 // Check for the number of significant digits to be
268 // at least half of the requested digits. If at least
269 // half of the requested digits have been achieved,
270 // then break after the upcoming iteration.
271 const bool break_after_this_iteration
= ( (k
> static_cast<std::int32_t>(4))
272 && (fabs(1 - fabs(ak
/ bk
)) < target_tolerance
));
278 if(break_after_this_iteration
)
287 // We are now finished with the AGM iteration for log(x).
289 // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
290 // Note at this time that (ak = bk) = AGM(...)
292 // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
293 const FloatingPointType result
=
294 boost::math::constants::pi
<FloatingPointType
>() / (ak
* 2)
295 - (boost::math::constants::ln_two
<FloatingPointType
>() * m
);
297 return ((b_negate
== true) ? -result
: result
);
300 } } } // namespace boost::multiprecision::exercise_threading
302 template<typename FloatingPointType
>
303 bool log_agm_concurrent(float& calculation_time
)
305 const std::size_t count
= boost::multiprecision::exercise_threading::detail::primes().size();
307 std::vector
<FloatingPointType
> log_results(count
);
308 std::vector
<FloatingPointType
> log_control(count
);
310 std::atomic_flag log_agm_lock
= ATOMIC_FLAG_INIT
;
312 std::size_t concurrent_log_agm_count
= 0U;
314 const std::clock_t start
= std::clock();
316 boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
320 [&log_results
, &log_control
, &concurrent_log_agm_count
, &log_agm_lock
](std::size_t i
)
322 while(log_agm_lock
.test_and_set()) { ; }
323 ++concurrent_log_agm_count
;
324 if((concurrent_log_agm_count
% 100U) == 0U)
326 std::cout
<< "log agm concurrent at index "
327 << concurrent_log_agm_count
329 << log_results
.size()
330 << ". Total processed so far: "
332 << std::setprecision(1)
333 << (100.0F
* float(concurrent_log_agm_count
)) / float(log_results
.size())
337 log_agm_lock
.clear();
339 const FloatingPointType dx
= (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i
]));
340 const FloatingPointType x
= boost::math::constants::catalan
<FloatingPointType
>() + dx
;
342 const FloatingPointType lr
= boost::multiprecision::exercise_threading::log(x
);
343 const FloatingPointType lc
= boost::multiprecision::log(x
);
350 calculation_time
= static_cast<float>(std::clock() - start
) / static_cast<float>(CLOCKS_PER_SEC
);
352 std::cout
<< std::endl
;
354 std::cout
<< "Checking results concurrent: ";
356 bool result_is_ok
= true;
358 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
362 const FloatingPointType close_fraction
= fabs(1 - (log_results
[i
] / log_control
[i
]));
364 result_is_ok
&= (close_fraction
< std::numeric_limits
<FloatingPointType
>::epsilon() * 1000000U);
367 std::cout
<< std::boolalpha
<< result_is_ok
<< std::endl
;
372 template<typename FloatingPointType
>
373 bool log_agm_sequential(float& calculation_time
)
375 const std::size_t count
= boost::multiprecision::exercise_threading::detail::primes().size();
377 std::vector
<FloatingPointType
> log_results(count
);
378 std::vector
<FloatingPointType
> log_control(count
);
380 std::atomic_flag log_agm_lock
= ATOMIC_FLAG_INIT
;
382 const std::clock_t start
= std::clock();
384 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
386 const std::size_t sequential_log_agm_count
= i
+ 1U;
388 if((sequential_log_agm_count
% 100U) == 0U)
390 std::cout
<< "log agm sequential at index "
391 << sequential_log_agm_count
393 << log_results
.size()
394 << ". Total processed so far: "
396 << std::setprecision(1)
397 << (100.0F
* float(sequential_log_agm_count
)) / float(log_results
.size())
402 const FloatingPointType dx
= (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i
]));
403 const FloatingPointType x
= boost::math::constants::catalan
<FloatingPointType
>() + dx
;
405 log_results
[i
] = boost::multiprecision::exercise_threading::log(x
);
406 log_control
[i
] = boost::multiprecision::log(x
);
409 calculation_time
= static_cast<float>(std::clock() - start
) / static_cast<float>(CLOCKS_PER_SEC
);
411 std::cout
<< std::endl
;
413 std::cout
<< "Checking results sequential: ";
415 bool result_is_ok
= true;
417 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
421 const FloatingPointType close_fraction
= fabs(1 - (log_results
[i
] / log_control
[i
]));
423 result_is_ok
&= (close_fraction
< std::numeric_limits
<FloatingPointType
>::epsilon() * 1000000U);
426 std::cout
<< std::boolalpha
<< result_is_ok
<< std::endl
;
433 std::cout
<< "Calculating "
434 << boost::multiprecision::exercise_threading::detail::primes().size()
438 float calculation_time_concurrent
;
439 const bool result_is_ok_concurrent
= log_agm_concurrent
<big_float_type
>(calculation_time_concurrent
);
441 float calculation_time_sequential
;
442 const bool result_is_ok_sequential
= log_agm_sequential
<big_float_type
>(calculation_time_sequential
);
444 std::cout
<< std::endl
;
446 std::cout
<< "result_is_ok_concurrent: "
448 << result_is_ok_concurrent
449 << ", calculation_time_concurrent: "
451 << std::setprecision(1)
452 << calculation_time_concurrent
456 std::cout
<< "result_is_ok_sequential: "
458 << result_is_ok_sequential
459 << ", calculation_time_sequential: "
461 << std::setprecision(1)
462 << calculation_time_sequential