1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright Christopher Kormanyos 2020 - 2021.
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
8 // This example exercises Boost.Multiprecision in concurrent
9 // multi-threaded environments. To do so a loop involving
10 // non-trivial calculations of numerous function values
11 // has been set up within both concurrent as well as
12 // sequential running environments. In particular,
13 // this example uses an AGM method to do a "from the ground up"
14 // calculation of logarithms. The logarithm functions values
15 // are compared with the values from Boost.Multiprecision's
16 // specific log functions for the relevant backends.
17 // The log GM here is not optimized or intended for
18 // high-performance work, but can be taken as an
19 // interesting example of an AGM iteration if helpful.
21 // This example has been initially motivated in part
23 // https://github.com/boostorg/multiprecision/pull/211
25 // We find the following performance data here:
26 // https://github.com/boostorg/multiprecision/pull/213
29 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
30 // result_is_ok_sequential: true, calculation_time_sequential: 48.5s
33 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
34 // result_is_ok_sequential: true, calculation_time_sequential: 50.4s
37 // result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
38 // result_is_ok_sequential: true, calculation_time_sequential: 12.4s
41 // result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
42 // result_is_ok_sequential: true, calculation_time_sequential: 1.9s
54 #include <boost/math/constants/constants.hpp>
55 #include <boost/math/special_functions/prime.hpp>
57 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
58 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
59 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
60 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
62 #if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
63 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
64 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
65 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
66 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
69 #if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70 #include <boost/multiprecision/cpp_dec_float.hpp>
72 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::cpp_dec_float
<501>,
73 boost::multiprecision::et_off
>;
75 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76 #include <boost/multiprecision/cpp_bin_float.hpp>
78 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::cpp_bin_float
<501>,
79 boost::multiprecision::et_off
>;
81 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82 #include <boost/multiprecision/gmp.hpp>
84 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::gmp_float
<501>,
85 boost::multiprecision::et_off
>;
87 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88 #include <boost/multiprecision/mpfr.hpp>
90 using big_float_type
= boost::multiprecision::number
<boost::multiprecision::mpfr_float_backend
<501>,
91 boost::multiprecision::et_off
>;
94 #error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
97 namespace boost
{ namespace multiprecision
{ namespace exercise_threading
{
101 namespace my_concurrency
{
102 template<typename index_type
,
103 typename callable_function_type
>
104 void parallel_for(index_type start
,
106 callable_function_type parallel_function
)
108 // Estimate the number of threads available.
109 static const unsigned int number_of_threads_hint
=
110 std::thread::hardware_concurrency();
112 static const unsigned int number_of_threads_total
=
113 ((number_of_threads_hint
== 0U) ? 4U : number_of_threads_hint
);
115 // Use only 3/4 of the available cores.
116 static const unsigned int number_of_threads
= number_of_threads_total
- (number_of_threads_total
/ 8U);
118 std::cout
<< "Executing with " << number_of_threads
<< " threads" << std::endl
;
120 // Set the size of a slice for the range functions.
121 index_type n
= index_type(end
- start
) + index_type(1);
124 static_cast<index_type
>(std::round(n
/ static_cast<float>(number_of_threads
)));
126 slice
= (std::max
)(slice
, index_type(1));
130 [¶llel_function
](index_type index_lo
, index_type index_hi
)
132 for(index_type i
= index_lo
; i
< index_hi
; ++i
)
134 parallel_function(i
);
138 // Create the thread pool and launch the jobs.
139 std::vector
<std::thread
> pool
;
141 pool
.reserve(number_of_threads
);
143 index_type i1
= start
;
144 index_type i2
= (std::min
)(index_type(start
+ slice
), end
);
146 for(index_type i
= 0U; ((index_type(i
+ index_type(1U)) < number_of_threads
) && (i1
< end
)); ++i
)
148 pool
.emplace_back(launch_range
, i1
, i2
);
152 i2
= (std::min
)(index_type(i2
+ slice
), end
);
157 pool
.emplace_back(launch_range
, i1
, end
);
160 // Wait for the jobs to finish.
161 for(std::thread
& thread_in_pool
: pool
)
163 if(thread_in_pool
.joinable())
165 thread_in_pool
.join();
169 } // namespace my_concurrency
171 template<typename FloatingPointType
,
172 typename UnsignedIntegralType
>
173 FloatingPointType
pown(const FloatingPointType
& b
, const UnsignedIntegralType
& p
)
175 // Calculate (b ^ p).
177 using local_floating_point_type
= FloatingPointType
;
178 using local_unsigned_integral_type
= UnsignedIntegralType
;
180 local_floating_point_type result
;
182 if (p
== local_unsigned_integral_type(0U)) { result
= local_floating_point_type(1U); }
183 else if(p
== local_unsigned_integral_type(1U)) { result
= b
; }
184 else if(p
== local_unsigned_integral_type(2U)) { result
= b
; result
*= b
; }
187 result
= local_floating_point_type(1U);
189 local_floating_point_type
y(b
);
191 for(local_unsigned_integral_type
p_local(p
); p_local
!= local_unsigned_integral_type(0U); p_local
>>= 1U)
193 if((static_cast<unsigned>(p_local
) & 1U) != 0U)
205 const std::vector
<std::uint32_t>& primes()
207 static std::vector
<std::uint32_t> my_primes
;
209 if(my_primes
.empty())
211 my_primes
.resize(10000U);
213 // Get exactly 10,000 primes.
214 for(std::size_t i
= 0U; i
< my_primes
.size(); ++i
)
216 my_primes
[i
] = boost::math::prime((unsigned int) i
);
223 } // namespace detail
225 template<typename FloatingPointType
>
226 FloatingPointType
log(const FloatingPointType
& x
)
228 // Use an AGM method to compute the logarithm of x.
230 // For values less than 1 invert the argument and
231 // remember (in this case) to negate the result below.
232 const bool b_negate
= (x
< 1);
234 const FloatingPointType xx
= ((b_negate
== false) ? x
: 1 / x
);
237 // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
239 FloatingPointType
ak(1U);
241 const float n_times_factor
= static_cast<float>(static_cast<float>(std::numeric_limits
<FloatingPointType
>::digits10
) * 1.67F
);
242 const float lgx_over_lg2
= std::log(static_cast<float>(xx
)) / std::log(2.0F
);
244 std::int32_t m
= static_cast<std::int32_t>(n_times_factor
- lgx_over_lg2
);
246 // Ensure that the resulting power is non-negative.
247 // Also enforce that m >= 8.
248 m
= (std::max
)(m
, static_cast<std::int32_t>(8));
250 FloatingPointType bk
= detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m
));
255 FloatingPointType
ak_tmp(0U);
259 // Determine the requested precision of the upcoming iteration in units of digits10.
260 const FloatingPointType target_tolerance
= sqrt(std::numeric_limits
<FloatingPointType
>::epsilon()) / 100;
262 for(std::int32_t k
= static_cast<std::int32_t>(0); k
< static_cast<std::int32_t>(64); ++k
)
266 // Check for the number of significant digits to be
267 // at least half of the requested digits. If at least
268 // half of the requested digits have been achieved,
269 // then break after the upcoming iteration.
270 const bool break_after_this_iteration
= ( (k
> static_cast<std::int32_t>(4))
271 && (fabs(1 - fabs(ak
/ bk
)) < target_tolerance
));
277 if(break_after_this_iteration
)
286 // We are now finished with the AGM iteration for log(x).
288 // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
289 // Note at this time that (ak = bk) = AGM(...)
291 // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
292 const FloatingPointType result
=
293 boost::math::constants::pi
<FloatingPointType
>() / (ak
* 2)
294 - (boost::math::constants::ln_two
<FloatingPointType
>() * m
);
296 return ((b_negate
== true) ? -result
: result
);
299 } } } // namespace boost::multiprecision::exercise_threading
301 template<typename FloatingPointType
>
302 bool log_agm_concurrent(float& calculation_time
)
304 const std::size_t count
= boost::multiprecision::exercise_threading::detail::primes().size();
306 std::vector
<FloatingPointType
> log_results(count
);
307 std::vector
<FloatingPointType
> log_control(count
);
309 std::atomic_flag log_agm_lock
= ATOMIC_FLAG_INIT
;
311 std::size_t concurrent_log_agm_count
= 0U;
313 const std::clock_t start
= std::clock();
315 boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
319 [&log_results
, &log_control
, &concurrent_log_agm_count
, &log_agm_lock
](std::size_t i
)
321 while(log_agm_lock
.test_and_set()) { ; }
322 const FloatingPointType dx
= (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i
]));
323 log_agm_lock
.clear();
325 const FloatingPointType x
= boost::math::constants::catalan
<FloatingPointType
>() + dx
;
327 const FloatingPointType lr
= boost::multiprecision::exercise_threading::log(x
);
328 const FloatingPointType lc
= boost::multiprecision::log(x
);
330 while(log_agm_lock
.test_and_set()) { ; }
335 ++concurrent_log_agm_count
;
337 if((concurrent_log_agm_count
% 100U) == 0U)
339 std::cout
<< "log agm concurrent at index "
340 << concurrent_log_agm_count
342 << log_results
.size()
343 << ". Total processed so far: "
345 << std::setprecision(1)
346 << (100.0F
* float(concurrent_log_agm_count
)) / float(log_results
.size())
351 log_agm_lock
.clear();
355 calculation_time
= static_cast<float>(std::clock() - start
) / static_cast<float>(CLOCKS_PER_SEC
);
357 std::cout
<< std::endl
;
359 std::cout
<< "Checking results concurrent: ";
361 bool result_is_ok
= true;
363 const FloatingPointType tol
= std::numeric_limits
<FloatingPointType
>::epsilon() * 1000000U;
365 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
369 const FloatingPointType close_fraction
= fabs(1 - (log_results
[i
] / log_control
[i
]));
371 result_is_ok
&= (close_fraction
< tol
);
374 std::cout
<< std::boolalpha
<< result_is_ok
<< std::endl
;
379 template<typename FloatingPointType
>
380 bool log_agm_sequential(float& calculation_time
)
382 const std::size_t count
= boost::multiprecision::exercise_threading::detail::primes().size();
384 std::vector
<FloatingPointType
> log_results(count
);
385 std::vector
<FloatingPointType
> log_control(count
);
387 const std::clock_t start
= std::clock();
389 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
391 const std::size_t sequential_log_agm_count
= i
+ 1U;
393 const FloatingPointType dx
= (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i
]));
394 const FloatingPointType x
= boost::math::constants::catalan
<FloatingPointType
>() + dx
;
396 log_results
[i
] = boost::multiprecision::exercise_threading::log(x
);
397 log_control
[i
] = boost::multiprecision::log(x
);
399 if((sequential_log_agm_count
% 100U) == 0U)
401 std::cout
<< "log agm sequential at index "
402 << sequential_log_agm_count
404 << log_results
.size()
405 << ". Total processed so far: "
407 << std::setprecision(1)
408 << (100.0F
* float(sequential_log_agm_count
)) / float(log_results
.size())
414 calculation_time
= static_cast<float>(std::clock() - start
) / static_cast<float>(CLOCKS_PER_SEC
);
416 std::cout
<< std::endl
;
418 std::cout
<< "Checking results sequential: ";
420 bool result_is_ok
= true;
422 const FloatingPointType tol
= std::numeric_limits
<FloatingPointType
>::epsilon() * 1000000U;
424 for(std::size_t i
= 0U; i
< log_results
.size(); ++i
)
428 const FloatingPointType close_fraction
= fabs(1 - (log_results
[i
] / log_control
[i
]));
430 result_is_ok
&= (close_fraction
< tol
);
433 std::cout
<< std::boolalpha
<< result_is_ok
<< std::endl
;
440 std::cout
<< "Calculating "
441 << boost::multiprecision::exercise_threading::detail::primes().size()
445 float calculation_time_concurrent
;
446 const bool result_is_ok_concurrent
= log_agm_concurrent
<big_float_type
>(calculation_time_concurrent
);
448 float calculation_time_sequential
;
449 const bool result_is_ok_sequential
= log_agm_sequential
<big_float_type
>(calculation_time_sequential
);
451 std::cout
<< std::endl
;
453 std::cout
<< "result_is_ok_concurrent: "
455 << result_is_ok_concurrent
456 << ", calculation_time_concurrent: "
458 << std::setprecision(1)
459 << calculation_time_concurrent
463 std::cout
<< "result_is_ok_sequential: "
465 << result_is_ok_sequential
466 << ", calculation_time_sequential: "
468 << std::setprecision(1)
469 << calculation_time_sequential