]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/multiprecision/example/exercise_threading_log_agm.cpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / libs / multiprecision / example / exercise_threading_log_agm.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright Christopher Kormanyos 2020 - 2021.
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 //
7
8 // This example exercises Boost.Multiprecision in concurrent
9 // multi-threaded environments. To do so a loop involving
10 // non-trivial calculations of numerous function values
11 // has been set up within both concurrent as well as
12 // sequential running environments. In particular,
13 // this example uses an AGM method to do a "from the ground up"
14 // calculation of logarithms. The logarithm functions values
15 // are compared with the values from Boost.Multiprecision's
16 // specific log functions for the relevant backends.
17 // The log GM here is not optimized or intended for
18 // high-performance work, but can be taken as an
19 // interesting example of an AGM iteration if helpful.
20
21 // This example has been initially motivated in part
22 // by discussions in:
23 // https://github.com/boostorg/multiprecision/pull/211
24
25 // We find the following performance data here:
26 // https://github.com/boostorg/multiprecision/pull/213
27 //
28 // cpp_dec_float:
29 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
30 // result_is_ok_sequential: true, calculation_time_sequential: 48.5s
31 //
32 // cpp_bin_float:
33 // result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
34 // result_is_ok_sequential: true, calculation_time_sequential: 50.4s
35 //
36 // gmp_float:
37 // result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
38 // result_is_ok_sequential: true, calculation_time_sequential: 12.4s
39 //
40 // mpfr_float:
41 // result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
42 // result_is_ok_sequential: true, calculation_time_sequential: 1.9s
43
44 #include <array>
45 #include <atomic>
46 #include <cstddef>
47 #include <cstdint>
48 #include <iomanip>
49 #include <iostream>
50 #include <limits>
51 #include <thread>
52 #include <vector>
53
54 #include <boost/math/constants/constants.hpp>
55 #include <boost/math/special_functions/prime.hpp>
56
57 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
58 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
59 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
60 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
61
62 #if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
63 #define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
64 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
65 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
66 //#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
67 #endif
68
69 #if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70 #include <boost/multiprecision/cpp_dec_float.hpp>
71
72 using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_dec_float<501>,
73 boost::multiprecision::et_off>;
74
75 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76 #include <boost/multiprecision/cpp_bin_float.hpp>
77
78 using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_bin_float<501>,
79 boost::multiprecision::et_off>;
80
81 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82 #include <boost/multiprecision/gmp.hpp>
83
84 using big_float_type = boost::multiprecision::number<boost::multiprecision::gmp_float<501>,
85 boost::multiprecision::et_off>;
86
87 #elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88 #include <boost/multiprecision/mpfr.hpp>
89
90 using big_float_type = boost::multiprecision::number<boost::multiprecision::mpfr_float_backend<501>,
91 boost::multiprecision::et_off>;
92
93 #else
94 #error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
95 #endif
96
97 namespace boost { namespace multiprecision { namespace exercise_threading {
98
99 namespace detail {
100
101 namespace my_concurrency {
102 template<typename index_type,
103 typename callable_function_type>
104 void parallel_for(index_type start,
105 index_type end,
106 callable_function_type parallel_function)
107 {
108 // Estimate the number of threads available.
109 static const unsigned int number_of_threads_hint =
110 std::thread::hardware_concurrency();
111
112 static const unsigned int number_of_threads_total =
113 ((number_of_threads_hint == 0U) ? 4U : number_of_threads_hint);
114
115 // Use only 3/4 of the available cores.
116 static const unsigned int number_of_threads = number_of_threads_total - (number_of_threads_total / 8U);
117
118 std::cout << "Executing with " << number_of_threads << " threads" << std::endl;
119
120 // Set the size of a slice for the range functions.
121 index_type n = index_type(end - start) + index_type(1);
122
123 index_type slice =
124 static_cast<index_type>(std::round(n / static_cast<float>(number_of_threads)));
125
126 slice = (std::max)(slice, index_type(1));
127
128 // Inner loop.
129 auto launch_range =
130 [&parallel_function](index_type index_lo, index_type index_hi)
131 {
132 for(index_type i = index_lo; i < index_hi; ++i)
133 {
134 parallel_function(i);
135 }
136 };
137
138 // Create the thread pool and launch the jobs.
139 std::vector<std::thread> pool;
140
141 pool.reserve(number_of_threads);
142
143 index_type i1 = start;
144 index_type i2 = (std::min)(index_type(start + slice), end);
145
146 for(index_type i = 0U; ((index_type(i + index_type(1U)) < number_of_threads) && (i1 < end)); ++i)
147 {
148 pool.emplace_back(launch_range, i1, i2);
149
150 i1 = i2;
151
152 i2 = (std::min)(index_type(i2 + slice), end);
153 }
154
155 if(i1 < end)
156 {
157 pool.emplace_back(launch_range, i1, end);
158 }
159
160 // Wait for the jobs to finish.
161 for(std::thread& thread_in_pool : pool)
162 {
163 if(thread_in_pool.joinable())
164 {
165 thread_in_pool.join();
166 }
167 }
168 }
169 } // namespace my_concurrency
170
171 template<typename FloatingPointType,
172 typename UnsignedIntegralType>
173 FloatingPointType pown(const FloatingPointType& b, const UnsignedIntegralType& p)
174 {
175 // Calculate (b ^ p).
176
177 using local_floating_point_type = FloatingPointType;
178 using local_unsigned_integral_type = UnsignedIntegralType;
179
180 local_floating_point_type result;
181
182 if (p == local_unsigned_integral_type(0U)) { result = local_floating_point_type(1U); }
183 else if(p == local_unsigned_integral_type(1U)) { result = b; }
184 else if(p == local_unsigned_integral_type(2U)) { result = b; result *= b; }
185 else
186 {
187 result = local_floating_point_type(1U);
188
189 local_floating_point_type y(b);
190
191 for(local_unsigned_integral_type p_local(p); p_local != local_unsigned_integral_type(0U); p_local >>= 1U)
192 {
193 if((static_cast<unsigned>(p_local) & 1U) != 0U)
194 {
195 result *= y;
196 }
197
198 y *= y;
199 }
200 }
201
202 return result;
203 }
204
205 const std::vector<std::uint32_t>& primes()
206 {
207 static std::vector<std::uint32_t> my_primes;
208
209 if(my_primes.empty())
210 {
211 my_primes.resize(10000U);
212
213 // Get exactly 10,000 primes.
214 for(std::size_t i = 0U; i < my_primes.size(); ++i)
215 {
216 my_primes[i] = boost::math::prime((unsigned int) i);
217 }
218 }
219
220 return my_primes;
221 }
222
223 } // namespace detail
224
225 template<typename FloatingPointType>
226 FloatingPointType log(const FloatingPointType& x)
227 {
228 // Use an AGM method to compute the logarithm of x.
229
230 // For values less than 1 invert the argument and
231 // remember (in this case) to negate the result below.
232 const bool b_negate = (x < 1);
233
234 const FloatingPointType xx = ((b_negate == false) ? x : 1 / x);
235
236 // Set a0 = 1
237 // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
238
239 FloatingPointType ak(1U);
240
241 const float n_times_factor = static_cast<float>(static_cast<float>(std::numeric_limits<FloatingPointType>::digits10) * 1.67F);
242 const float lgx_over_lg2 = std::log(static_cast<float>(xx)) / std::log(2.0F);
243
244 std::int32_t m = static_cast<std::int32_t>(n_times_factor - lgx_over_lg2);
245
246 // Ensure that the resulting power is non-negative.
247 // Also enforce that m >= 8.
248 m = (std::max)(m, static_cast<std::int32_t>(8));
249
250 FloatingPointType bk = detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m));
251
252 bk *= xx;
253 bk = 4 / bk;
254
255 FloatingPointType ak_tmp(0U);
256
257 using std::sqrt;
258
259 // Determine the requested precision of the upcoming iteration in units of digits10.
260 const FloatingPointType target_tolerance = sqrt(std::numeric_limits<FloatingPointType>::epsilon()) / 100;
261
262 for(std::int32_t k = static_cast<std::int32_t>(0); k < static_cast<std::int32_t>(64); ++k)
263 {
264 using std::fabs;
265
266 // Check for the number of significant digits to be
267 // at least half of the requested digits. If at least
268 // half of the requested digits have been achieved,
269 // then break after the upcoming iteration.
270 const bool break_after_this_iteration = ( (k > static_cast<std::int32_t>(4))
271 && (fabs(1 - fabs(ak / bk)) < target_tolerance));
272
273 ak_tmp = ak;
274 ak += bk;
275 ak /= 2;
276
277 if(break_after_this_iteration)
278 {
279 break;
280 }
281
282 bk *= ak_tmp;
283 bk = sqrt(bk);
284 }
285
286 // We are now finished with the AGM iteration for log(x).
287
288 // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
289 // Note at this time that (ak = bk) = AGM(...)
290
291 // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
292 const FloatingPointType result =
293 boost::math::constants::pi<FloatingPointType>() / (ak * 2)
294 - (boost::math::constants::ln_two<FloatingPointType>() * m);
295
296 return ((b_negate == true) ? -result : result);
297 }
298
299 } } } // namespace boost::multiprecision::exercise_threading
300
301 template<typename FloatingPointType>
302 bool log_agm_concurrent(float& calculation_time)
303 {
304 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
305
306 std::vector<FloatingPointType> log_results(count);
307 std::vector<FloatingPointType> log_control(count);
308
309 std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;
310
311 std::size_t concurrent_log_agm_count = 0U;
312
313 const std::clock_t start = std::clock();
314
315 boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
316 (
317 std::size_t(0U),
318 log_results.size(),
319 [&log_results, &log_control, &concurrent_log_agm_count, &log_agm_lock](std::size_t i)
320 {
321 while(log_agm_lock.test_and_set()) { ; }
322 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
323 log_agm_lock.clear();
324
325 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
326
327 const FloatingPointType lr = boost::multiprecision::exercise_threading::log(x);
328 const FloatingPointType lc = boost::multiprecision::log(x);
329
330 while(log_agm_lock.test_and_set()) { ; }
331
332 log_results[i] = lr;
333 log_control[i] = lc;
334
335 ++concurrent_log_agm_count;
336
337 if((concurrent_log_agm_count % 100U) == 0U)
338 {
339 std::cout << "log agm concurrent at index "
340 << concurrent_log_agm_count
341 << " of "
342 << log_results.size()
343 << ". Total processed so far: "
344 << std::fixed
345 << std::setprecision(1)
346 << (100.0F * float(concurrent_log_agm_count)) / float(log_results.size())
347 << "%."
348 << "\r";
349 }
350
351 log_agm_lock.clear();
352 }
353 );
354
355 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
356
357 std::cout << std::endl;
358
359 std::cout << "Checking results concurrent: ";
360
361 bool result_is_ok = true;
362
363 const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
364
365 for(std::size_t i = 0U; i < log_results.size(); ++i)
366 {
367 using std::fabs;
368
369 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
370
371 result_is_ok &= (close_fraction < tol);
372 }
373
374 std::cout << std::boolalpha << result_is_ok << std::endl;
375
376 return result_is_ok;
377 }
378
379 template<typename FloatingPointType>
380 bool log_agm_sequential(float& calculation_time)
381 {
382 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
383
384 std::vector<FloatingPointType> log_results(count);
385 std::vector<FloatingPointType> log_control(count);
386
387 const std::clock_t start = std::clock();
388
389 for(std::size_t i = 0U; i < log_results.size(); ++i)
390 {
391 const std::size_t sequential_log_agm_count = i + 1U;
392
393 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
394 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
395
396 log_results[i] = boost::multiprecision::exercise_threading::log(x);
397 log_control[i] = boost::multiprecision::log(x);
398
399 if((sequential_log_agm_count % 100U) == 0U)
400 {
401 std::cout << "log agm sequential at index "
402 << sequential_log_agm_count
403 << " of "
404 << log_results.size()
405 << ". Total processed so far: "
406 << std::fixed
407 << std::setprecision(1)
408 << (100.0F * float(sequential_log_agm_count)) / float(log_results.size())
409 << "%."
410 << "\r";
411 }
412 }
413
414 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
415
416 std::cout << std::endl;
417
418 std::cout << "Checking results sequential: ";
419
420 bool result_is_ok = true;
421
422 const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
423
424 for(std::size_t i = 0U; i < log_results.size(); ++i)
425 {
426 using std::fabs;
427
428 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
429
430 result_is_ok &= (close_fraction < tol);
431 }
432
433 std::cout << std::boolalpha << result_is_ok << std::endl;
434
435 return result_is_ok;
436 }
437
438 int main()
439 {
440 std::cout << "Calculating "
441 << boost::multiprecision::exercise_threading::detail::primes().size()
442 << " primes"
443 << std::endl;
444
445 float calculation_time_concurrent;
446 const bool result_is_ok_concurrent = log_agm_concurrent<big_float_type>(calculation_time_concurrent);
447
448 float calculation_time_sequential;
449 const bool result_is_ok_sequential = log_agm_sequential<big_float_type>(calculation_time_sequential);
450
451 std::cout << std::endl;
452
453 std::cout << "result_is_ok_concurrent: "
454 << std::boolalpha
455 << result_is_ok_concurrent
456 << ", calculation_time_concurrent: "
457 << std::fixed
458 << std::setprecision(1)
459 << calculation_time_concurrent
460 << "s"
461 << std::endl;
462
463 std::cout << "result_is_ok_sequential: "
464 << std::boolalpha
465 << result_is_ok_sequential
466 << ", calculation_time_sequential: "
467 << std::fixed
468 << std::setprecision(1)
469 << calculation_time_sequential
470 << "s"
471 << std::endl;
472 }