]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/multiprecision/example/exercise_threading_log_agm.cpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / libs / multiprecision / example / exercise_threading_log_agm.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright Christopher Kormanyos 2020.
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 //
7
8 // This example exercises Boost.Multiprecision in concurrent
9 // multi-threaded environments. To do so a loop involving
10 // non-trivial calculations of numerous function values
11 // has been set up within both concurrent as well as
12 // sequential running environments. In particular,
13 // this example uses an AGM method to do a "from the ground up"
14 // calculation of logarithms. The logarithm functions values
15 // are compared with the values from Boost.Multiprecision's
16 // specific log functions for the relevant backends.
17 // The log GM here is not optimized or intended for
18 // high-performance work, but can be taken as an
19 // interesting example of an AGM iteration if helpful.
20
21 // This example has been initially motivated in part
22 // by discussions in:
23 // https://github.com/boostorg/multiprecision/pull/211
24
25 // We find the following performance data here:
26 // https://github.com/boostorg/multiprecision/pull/213
27 //
28 // cpp_dec_float:
29 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
30 // result_is_ok_sequential: true, calculation_time_sequential: 48.5s
31 //
32 // cpp_bin_float:
33 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
34 // result_is_ok_sequential: true, calculation_time_sequential: 50.4s
35 //
36 // gmp_float:
37 // result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
38 // result_is_ok_sequential: true, calculation_time_sequential: 12.4s
39 //
40 // mpfr_float:
41 // result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
42 // result_is_ok_sequential: true, calculation_time_sequential: 1.9s
43
44 #include <array>
45 #include <atomic>
46 #include <cstddef>
47 #include <cstdint>
48 #include <iomanip>
49 #include <iostream>
50 #include <limits>
51 #include <thread>
52 #include <vector>
53
54 #include <boost/math/constants/constants.hpp>
55 #include <boost/math/special_functions/prime.hpp>
56
57 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
58 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
59 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
60 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
61
62 #if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
63 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
64 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
65 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
66 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
67 #endif
68
69 #if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70 #include <boost/multiprecision/cpp_dec_float.hpp>
71
72 using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_dec_float<501>,
73 boost::multiprecision::et_off>;
74
75 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76 #include <boost/multiprecision/cpp_bin_float.hpp>
77
78 using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_bin_float<501>,
79 boost::multiprecision::et_off>;
80
81 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82 #include <boost/multiprecision/gmp.hpp>
83
84 using big_float_type = boost::multiprecision::number<boost::multiprecision::gmp_float<501>,
85 boost::multiprecision::et_off>;
86
87 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88 #include <boost/multiprecision/mpfr.hpp>
89
90 using big_float_type = boost::multiprecision::number<boost::multiprecision::mpfr_float_backend<501>,
91 boost::multiprecision::et_off>;
92
93 #else
94 #error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
95 #endif
96
97 namespace boost { namespace multiprecision { namespace exercise_threading {
98
99 namespace detail {
100
101 namespace my_concurrency {
102 template<typename index_type,
103 typename callable_function_type>
104 void parallel_for(index_type start,
105 index_type end,
106 callable_function_type parallel_function)
107 {
108 // Estimate the number of threads available.
109 static const unsigned int number_of_threads_hint =
110 std::thread::hardware_concurrency();
111
112 static const unsigned int number_of_threads_total =
113 ((number_of_threads_hint == 0U) ? 4U : number_of_threads_hint);
114
115 // Use only 3/4 of the available cores.
116 static const unsigned int number_of_threads = number_of_threads_total - (number_of_threads_total / 8U);
117
118 std::cout << "Executing with " << number_of_threads << " threads" << std::endl;
119
120 // Set the size of a slice for the range functions.
121 index_type n = index_type(end - start) + index_type(1);
122
123 index_type slice =
124 static_cast<index_type>(std::round(n / static_cast<float>(number_of_threads)));
125
126 slice = (std::max)(slice, index_type(1));
127
128 // Inner loop.
129 auto launch_range =
130 [&parallel_function](index_type index_lo, index_type index_hi)
131 {
132 for(index_type i = index_lo; i < index_hi; ++i)
133 {
134 parallel_function(i);
135 }
136 };
137
138 // Create the thread pool and launch the jobs.
139 std::vector<std::thread> pool;
140
141 pool.reserve(number_of_threads);
142
143 index_type i1 = start;
144 index_type i2 = (std::min)(index_type(start + slice), end);
145
146 for(index_type i = 0U; ((index_type(i + index_type(1U)) < number_of_threads) && (i1 < end)); ++i)
147 {
148 pool.emplace_back(launch_range, i1, i2);
149
150 i1 = i2;
151
152 i2 = (std::min)(index_type(i2 + slice), end);
153 }
154
155 if(i1 < end)
156 {
157 pool.emplace_back(launch_range, i1, end);
158 }
159
160 // Wait for the jobs to finish.
161 for(std::thread& thread_in_pool : pool)
162 {
163 if(thread_in_pool.joinable())
164 {
165 thread_in_pool.join();
166 }
167 }
168 }
169 } // namespace my_concurrency
170
171 template<typename FloatingPointType,
172 typename UnsignedIntegralType>
173 FloatingPointType pown(const FloatingPointType& b, const UnsignedIntegralType& p)
174 {
175 // Calculate (b ^ p).
176
177 using local_floating_point_type = FloatingPointType;
178 using local_unsigned_integral_type = UnsignedIntegralType;
179
180 local_floating_point_type result;
181
182 if (p == local_unsigned_integral_type(0U)) { result = local_floating_point_type(1U); }
183 else if(p == local_unsigned_integral_type(1U)) { result = b; }
184 else if(p == local_unsigned_integral_type(2U)) { result = b; result *= b; }
185 else
186 {
187 result = local_floating_point_type(1U);
188
189 local_floating_point_type y(b);
190
191 for(local_unsigned_integral_type p_local(p); p_local != local_unsigned_integral_type(0U); p_local >>= 1U)
192 {
193 if((static_cast<unsigned>(p_local) & 1U) != 0U)
194 {
195 result *= y;
196 }
197
198 y *= y;
199 }
200 }
201
202 return result;
203 }
204
205 const std::vector<std::uint32_t>& primes()
206 {
207 static const std::vector<std::uint32_t> my_primes =
208 []() -> std::vector<std::uint32_t>
209 {
210 std::vector<std::uint32_t> local_primes(10000U);
211
212 // Get exactly 10,000 primes.
213 for(auto i = 0U; i < local_primes.size(); ++i)
214 {
215 local_primes[i] = boost::math::prime(i);
216 }
217
218 return local_primes;
219 }();
220
221 return my_primes;
222 }
223
224 } // namespace detail
225
226 template<typename FloatingPointType>
227 FloatingPointType log(const FloatingPointType& x)
228 {
229 // Use an AGM method to compute the logarithm of x.
230
231 // For values less than 1 invert the argument and
232 // remember (in this case) to negate the result below.
233 const bool b_negate = (x < 1);
234
235 const FloatingPointType xx = ((b_negate == false) ? x : 1 / x);
236
237 // Set a0 = 1
238 // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
239
240 FloatingPointType ak(1U);
241
242 const float n_times_factor = static_cast<float>(static_cast<float>(std::numeric_limits<FloatingPointType>::digits10) * 1.67F);
243 const float lgx_over_lg2 = std::log(static_cast<float>(xx)) / std::log(2.0F);
244
245 std::int32_t m = static_cast<std::int32_t>(n_times_factor - lgx_over_lg2);
246
247 // Ensure that the resulting power is non-negative.
248 // Also enforce that m >= 8.
249 m = (std::max)(m, static_cast<std::int32_t>(8));
250
251 FloatingPointType bk = detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m));
252
253 bk *= xx;
254 bk = 4 / bk;
255
256 FloatingPointType ak_tmp(0U);
257
258 using std::sqrt;
259
260 // Determine the requested precision of the upcoming iteration in units of digits10.
261 const FloatingPointType target_tolerance = sqrt(std::numeric_limits<FloatingPointType>::epsilon()) / 100;
262
263 for(std::int32_t k = static_cast<std::int32_t>(0); k < static_cast<std::int32_t>(64); ++k)
264 {
265 using std::fabs;
266
267 // Check for the number of significant digits to be
268 // at least half of the requested digits. If at least
269 // half of the requested digits have been achieved,
270 // then break after the upcoming iteration.
271 const bool break_after_this_iteration = ( (k > static_cast<std::int32_t>(4))
272 && (fabs(1 - fabs(ak / bk)) < target_tolerance));
273
274 ak_tmp = ak;
275 ak += bk;
276 ak /= 2;
277
278 if(break_after_this_iteration)
279 {
280 break;
281 }
282
283 bk *= ak_tmp;
284 bk = sqrt(bk);
285 }
286
287 // We are now finished with the AGM iteration for log(x).
288
289 // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
290 // Note at this time that (ak = bk) = AGM(...)
291
292 // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
293 const FloatingPointType result =
294 boost::math::constants::pi<FloatingPointType>() / (ak * 2)
295 - (boost::math::constants::ln_two<FloatingPointType>() * m);
296
297 return ((b_negate == true) ? -result : result);
298 }
299
300 } } } // namespace boost::multiprecision::exercise_threading
301
302 template<typename FloatingPointType>
303 bool log_agm_concurrent(float& calculation_time)
304 {
305 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
306
307 std::vector<FloatingPointType> log_results(count);
308 std::vector<FloatingPointType> log_control(count);
309
310 std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;
311
312 std::size_t concurrent_log_agm_count = 0U;
313
314 const std::clock_t start = std::clock();
315
316 boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
317 (
318 std::size_t(0U),
319 log_results.size(),
320 [&log_results, &log_control, &concurrent_log_agm_count, &log_agm_lock](std::size_t i)
321 {
322 while(log_agm_lock.test_and_set()) { ; }
323 ++concurrent_log_agm_count;
324 if((concurrent_log_agm_count % 100U) == 0U)
325 {
326 std::cout << "log agm concurrent at index "
327 << concurrent_log_agm_count
328 << " of "
329 << log_results.size()
330 << ". Total processed so far: "
331 << std::fixed
332 << std::setprecision(1)
333 << (100.0F * float(concurrent_log_agm_count)) / float(log_results.size())
334 << "%."
335 << "\r";
336 }
337 log_agm_lock.clear();
338
339 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
340 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
341
342 const FloatingPointType lr = boost::multiprecision::exercise_threading::log(x);
343 const FloatingPointType lc = boost::multiprecision::log(x);
344
345 log_results[i] = lr;
346 log_control[i] = lc;
347 }
348 );
349
350 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
351
352 std::cout << std::endl;
353
354 std::cout << "Checking results concurrent: ";
355
356 bool result_is_ok = true;
357
358 for(std::size_t i = 0U; i < log_results.size(); ++i)
359 {
360 using std::fabs;
361
362 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
363
364 result_is_ok &= (close_fraction < std::numeric_limits<FloatingPointType>::epsilon() * 1000000U);
365 }
366
367 std::cout << std::boolalpha << result_is_ok << std::endl;
368
369 return result_is_ok;
370 }
371
372 template<typename FloatingPointType>
373 bool log_agm_sequential(float& calculation_time)
374 {
375 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
376
377 std::vector<FloatingPointType> log_results(count);
378 std::vector<FloatingPointType> log_control(count);
379
380 std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;
381
382 const std::clock_t start = std::clock();
383
384 for(std::size_t i = 0U; i < log_results.size(); ++i)
385 {
386 const std::size_t sequential_log_agm_count = i + 1U;
387
388 if((sequential_log_agm_count % 100U) == 0U)
389 {
390 std::cout << "log agm sequential at index "
391 << sequential_log_agm_count
392 << " of "
393 << log_results.size()
394 << ". Total processed so far: "
395 << std::fixed
396 << std::setprecision(1)
397 << (100.0F * float(sequential_log_agm_count)) / float(log_results.size())
398 << "%."
399 << "\r";
400 }
401
402 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
403 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
404
405 log_results[i] = boost::multiprecision::exercise_threading::log(x);
406 log_control[i] = boost::multiprecision::log(x);
407 }
408
409 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
410
411 std::cout << std::endl;
412
413 std::cout << "Checking results sequential: ";
414
415 bool result_is_ok = true;
416
417 for(std::size_t i = 0U; i < log_results.size(); ++i)
418 {
419 using std::fabs;
420
421 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
422
423 result_is_ok &= (close_fraction < std::numeric_limits<FloatingPointType>::epsilon() * 1000000U);
424 }
425
426 std::cout << std::boolalpha << result_is_ok << std::endl;
427
428 return result_is_ok;
429 }
430
431 int main()
432 {
433 std::cout << "Calculating "
434 << boost::multiprecision::exercise_threading::detail::primes().size()
435 << " primes"
436 << std::endl;
437
438 float calculation_time_concurrent;
439 const bool result_is_ok_concurrent = log_agm_concurrent<big_float_type>(calculation_time_concurrent);
440
441 float calculation_time_sequential;
442 const bool result_is_ok_sequential = log_agm_sequential<big_float_type>(calculation_time_sequential);
443
444 std::cout << std::endl;
445
446 std::cout << "result_is_ok_concurrent: "
447 << std::boolalpha
448 << result_is_ok_concurrent
449 << ", calculation_time_concurrent: "
450 << std::fixed
451 << std::setprecision(1)
452 << calculation_time_concurrent
453 << "s"
454 << std::endl;
455
456 std::cout << "result_is_ok_sequential: "
457 << std::boolalpha
458 << result_is_ok_sequential
459 << ", calculation_time_sequential: "
460 << std::fixed
461 << std::setprecision(1)
462 << calculation_time_sequential
463 << "s"
464 << std::endl;
465 }