]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/multiprecision/example/exercise_threading_log_agm.cpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / libs / multiprecision / example / exercise_threading_log_agm.cpp
CommitLineData
20effc67 1///////////////////////////////////////////////////////////////////////////////
1e59de90 2// Copyright Christopher Kormanyos 2020 - 2021.
20effc67
TL
3// Distributed under the Boost Software License, Version 1.0.
4// (See accompanying file LICENSE_1_0.txt or copy at
5// http://www.boost.org/LICENSE_1_0.txt)
6//
7
8// This example exercises Boost.Multiprecision in concurrent
9// multi-threaded environments. To do so a loop involving
10// non-trivial calculations of numerous function values
11// has been set up within both concurrent as well as
12// sequential running environments. In particular,
13// this example uses an AGM method to do a "from the ground up"
14// calculation of logarithms. The logarithm functions values
15// are compared with the values from Boost.Multiprecision's
16// specific log functions for the relevant backends.
17// The log GM here is not optimized or intended for
18// high-performance work, but can be taken as an
19// interesting example of an AGM iteration if helpful.
20
21// This example has been initially motivated in part
22// by discussions in:
23// https://github.com/boostorg/multiprecision/pull/211
24
25// We find the following performance data here:
26// https://github.com/boostorg/multiprecision/pull/213
27//
28// cpp_dec_float:
29// result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
30// result_is_ok_sequential: true, calculation_time_sequential: 48.5s
31//
32// cpp_bin_float:
33// result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
34// result_is_ok_sequential: true, calculation_time_sequential: 50.4s
35//
36// gmp_float:
37// result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
38// result_is_ok_sequential: true, calculation_time_sequential: 12.4s
39//
40// mpfr_float:
41// result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
42// result_is_ok_sequential: true, calculation_time_sequential: 1.9s
43
44#include <array>
45#include <atomic>
46#include <cstddef>
47#include <cstdint>
48#include <iomanip>
49#include <iostream>
50#include <limits>
51#include <thread>
52#include <vector>
53
54#include <boost/math/constants/constants.hpp>
55#include <boost/math/special_functions/prime.hpp>
56
57#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
58#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
59#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
60#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
61
62#if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
63#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
64//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
65//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
66//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
67#endif
68
69#if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70#include <boost/multiprecision/cpp_dec_float.hpp>
71
72using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_dec_float<501>,
73 boost::multiprecision::et_off>;
74
75#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76#include <boost/multiprecision/cpp_bin_float.hpp>
77
78using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_bin_float<501>,
79 boost::multiprecision::et_off>;
80
81#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82#include <boost/multiprecision/gmp.hpp>
83
84using big_float_type = boost::multiprecision::number<boost::multiprecision::gmp_float<501>,
85 boost::multiprecision::et_off>;
86
87#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88#include <boost/multiprecision/mpfr.hpp>
89
90using big_float_type = boost::multiprecision::number<boost::multiprecision::mpfr_float_backend<501>,
91 boost::multiprecision::et_off>;
92
93#else
94#error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
95#endif
96
97namespace boost { namespace multiprecision { namespace exercise_threading {
98
99namespace detail {
100
101namespace my_concurrency {
102template<typename index_type,
103 typename callable_function_type>
104void parallel_for(index_type start,
105 index_type end,
106 callable_function_type parallel_function)
107{
108 // Estimate the number of threads available.
109 static const unsigned int number_of_threads_hint =
110 std::thread::hardware_concurrency();
111
112 static const unsigned int number_of_threads_total =
113 ((number_of_threads_hint == 0U) ? 4U : number_of_threads_hint);
114
115 // Use only 3/4 of the available cores.
116 static const unsigned int number_of_threads = number_of_threads_total - (number_of_threads_total / 8U);
117
118 std::cout << "Executing with " << number_of_threads << " threads" << std::endl;
119
120 // Set the size of a slice for the range functions.
121 index_type n = index_type(end - start) + index_type(1);
122
123 index_type slice =
124 static_cast<index_type>(std::round(n / static_cast<float>(number_of_threads)));
125
126 slice = (std::max)(slice, index_type(1));
127
128 // Inner loop.
129 auto launch_range =
130 [&parallel_function](index_type index_lo, index_type index_hi)
131 {
132 for(index_type i = index_lo; i < index_hi; ++i)
133 {
134 parallel_function(i);
135 }
136 };
137
138 // Create the thread pool and launch the jobs.
139 std::vector<std::thread> pool;
140
141 pool.reserve(number_of_threads);
142
143 index_type i1 = start;
144 index_type i2 = (std::min)(index_type(start + slice), end);
145
146 for(index_type i = 0U; ((index_type(i + index_type(1U)) < number_of_threads) && (i1 < end)); ++i)
147 {
148 pool.emplace_back(launch_range, i1, i2);
149
150 i1 = i2;
151
152 i2 = (std::min)(index_type(i2 + slice), end);
153 }
154
155 if(i1 < end)
156 {
157 pool.emplace_back(launch_range, i1, end);
158 }
159
160 // Wait for the jobs to finish.
161 for(std::thread& thread_in_pool : pool)
162 {
163 if(thread_in_pool.joinable())
164 {
165 thread_in_pool.join();
166 }
167 }
168}
169} // namespace my_concurrency
170
171template<typename FloatingPointType,
172 typename UnsignedIntegralType>
173FloatingPointType pown(const FloatingPointType& b, const UnsignedIntegralType& p)
174{
175 // Calculate (b ^ p).
176
177 using local_floating_point_type = FloatingPointType;
178 using local_unsigned_integral_type = UnsignedIntegralType;
179
180 local_floating_point_type result;
181
182 if (p == local_unsigned_integral_type(0U)) { result = local_floating_point_type(1U); }
183 else if(p == local_unsigned_integral_type(1U)) { result = b; }
184 else if(p == local_unsigned_integral_type(2U)) { result = b; result *= b; }
185 else
186 {
187 result = local_floating_point_type(1U);
188
189 local_floating_point_type y(b);
190
191 for(local_unsigned_integral_type p_local(p); p_local != local_unsigned_integral_type(0U); p_local >>= 1U)
192 {
193 if((static_cast<unsigned>(p_local) & 1U) != 0U)
194 {
195 result *= y;
196 }
197
198 y *= y;
199 }
200 }
201
202 return result;
203}
204
205const std::vector<std::uint32_t>& primes()
206{
1e59de90
TL
207 static std::vector<std::uint32_t> my_primes;
208
209 if(my_primes.empty())
20effc67 210 {
1e59de90 211 my_primes.resize(10000U);
20effc67
TL
212
213 // Get exactly 10,000 primes.
1e59de90 214 for(std::size_t i = 0U; i < my_primes.size(); ++i)
20effc67 215 {
1e59de90 216 my_primes[i] = boost::math::prime((unsigned int) i);
20effc67 217 }
1e59de90 218 }
20effc67
TL
219
220 return my_primes;
221}
222
223} // namespace detail
224
225template<typename FloatingPointType>
226FloatingPointType log(const FloatingPointType& x)
227{
228 // Use an AGM method to compute the logarithm of x.
229
230 // For values less than 1 invert the argument and
231 // remember (in this case) to negate the result below.
232 const bool b_negate = (x < 1);
233
234 const FloatingPointType xx = ((b_negate == false) ? x : 1 / x);
235
236 // Set a0 = 1
237 // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
238
239 FloatingPointType ak(1U);
240
241 const float n_times_factor = static_cast<float>(static_cast<float>(std::numeric_limits<FloatingPointType>::digits10) * 1.67F);
242 const float lgx_over_lg2 = std::log(static_cast<float>(xx)) / std::log(2.0F);
243
244 std::int32_t m = static_cast<std::int32_t>(n_times_factor - lgx_over_lg2);
245
246 // Ensure that the resulting power is non-negative.
247 // Also enforce that m >= 8.
248 m = (std::max)(m, static_cast<std::int32_t>(8));
249
250 FloatingPointType bk = detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m));
251
252 bk *= xx;
253 bk = 4 / bk;
254
255 FloatingPointType ak_tmp(0U);
256
257 using std::sqrt;
258
259 // Determine the requested precision of the upcoming iteration in units of digits10.
260 const FloatingPointType target_tolerance = sqrt(std::numeric_limits<FloatingPointType>::epsilon()) / 100;
261
262 for(std::int32_t k = static_cast<std::int32_t>(0); k < static_cast<std::int32_t>(64); ++k)
263 {
264 using std::fabs;
265
266 // Check for the number of significant digits to be
267 // at least half of the requested digits. If at least
268 // half of the requested digits have been achieved,
269 // then break after the upcoming iteration.
270 const bool break_after_this_iteration = ( (k > static_cast<std::int32_t>(4))
271 && (fabs(1 - fabs(ak / bk)) < target_tolerance));
272
273 ak_tmp = ak;
274 ak += bk;
275 ak /= 2;
276
277 if(break_after_this_iteration)
278 {
279 break;
280 }
281
282 bk *= ak_tmp;
283 bk = sqrt(bk);
284 }
285
286 // We are now finished with the AGM iteration for log(x).
287
288 // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
289 // Note at this time that (ak = bk) = AGM(...)
290
291 // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
292 const FloatingPointType result =
293 boost::math::constants::pi<FloatingPointType>() / (ak * 2)
294 - (boost::math::constants::ln_two<FloatingPointType>() * m);
295
296 return ((b_negate == true) ? -result : result);
297}
298
299} } } // namespace boost::multiprecision::exercise_threading
300
301template<typename FloatingPointType>
302bool log_agm_concurrent(float& calculation_time)
303{
304 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
305
306 std::vector<FloatingPointType> log_results(count);
307 std::vector<FloatingPointType> log_control(count);
308
309 std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;
310
311 std::size_t concurrent_log_agm_count = 0U;
312
313 const std::clock_t start = std::clock();
314
315 boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
316 (
317 std::size_t(0U),
318 log_results.size(),
319 [&log_results, &log_control, &concurrent_log_agm_count, &log_agm_lock](std::size_t i)
320 {
321 while(log_agm_lock.test_and_set()) { ; }
1e59de90
TL
322 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
323 log_agm_lock.clear();
324
325 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
326
327 const FloatingPointType lr = boost::multiprecision::exercise_threading::log(x);
328 const FloatingPointType lc = boost::multiprecision::log(x);
329
330 while(log_agm_lock.test_and_set()) { ; }
331
332 log_results[i] = lr;
333 log_control[i] = lc;
334
20effc67 335 ++concurrent_log_agm_count;
1e59de90 336
20effc67
TL
337 if((concurrent_log_agm_count % 100U) == 0U)
338 {
339 std::cout << "log agm concurrent at index "
340 << concurrent_log_agm_count
341 << " of "
342 << log_results.size()
343 << ". Total processed so far: "
344 << std::fixed
345 << std::setprecision(1)
346 << (100.0F * float(concurrent_log_agm_count)) / float(log_results.size())
347 << "%."
348 << "\r";
349 }
20effc67 350
1e59de90 351 log_agm_lock.clear();
20effc67
TL
352 }
353 );
354
355 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
356
357 std::cout << std::endl;
358
359 std::cout << "Checking results concurrent: ";
360
361 bool result_is_ok = true;
362
1e59de90
TL
363 const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
364
20effc67
TL
365 for(std::size_t i = 0U; i < log_results.size(); ++i)
366 {
367 using std::fabs;
368
369 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
370
1e59de90 371 result_is_ok &= (close_fraction < tol);
20effc67
TL
372 }
373
374 std::cout << std::boolalpha << result_is_ok << std::endl;
375
376 return result_is_ok;
377}
378
379template<typename FloatingPointType>
380bool log_agm_sequential(float& calculation_time)
381{
382 const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
383
384 std::vector<FloatingPointType> log_results(count);
385 std::vector<FloatingPointType> log_control(count);
386
20effc67
TL
387 const std::clock_t start = std::clock();
388
389 for(std::size_t i = 0U; i < log_results.size(); ++i)
390 {
391 const std::size_t sequential_log_agm_count = i + 1U;
392
1e59de90
TL
393 const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
394 const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
395
396 log_results[i] = boost::multiprecision::exercise_threading::log(x);
397 log_control[i] = boost::multiprecision::log(x);
398
20effc67
TL
399 if((sequential_log_agm_count % 100U) == 0U)
400 {
401 std::cout << "log agm sequential at index "
402 << sequential_log_agm_count
403 << " of "
404 << log_results.size()
405 << ". Total processed so far: "
406 << std::fixed
407 << std::setprecision(1)
408 << (100.0F * float(sequential_log_agm_count)) / float(log_results.size())
409 << "%."
410 << "\r";
411 }
20effc67
TL
412 }
413
414 calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
415
416 std::cout << std::endl;
417
418 std::cout << "Checking results sequential: ";
419
420 bool result_is_ok = true;
421
1e59de90
TL
422 const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
423
20effc67
TL
424 for(std::size_t i = 0U; i < log_results.size(); ++i)
425 {
426 using std::fabs;
427
428 const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
429
1e59de90 430 result_is_ok &= (close_fraction < tol);
20effc67
TL
431 }
432
433 std::cout << std::boolalpha << result_is_ok << std::endl;
434
435 return result_is_ok;
436}
437
438int main()
439{
440 std::cout << "Calculating "
441 << boost::multiprecision::exercise_threading::detail::primes().size()
442 << " primes"
443 << std::endl;
444
445 float calculation_time_concurrent;
446 const bool result_is_ok_concurrent = log_agm_concurrent<big_float_type>(calculation_time_concurrent);
447
448 float calculation_time_sequential;
449 const bool result_is_ok_sequential = log_agm_sequential<big_float_type>(calculation_time_sequential);
450
451 std::cout << std::endl;
452
453 std::cout << "result_is_ok_concurrent: "
454 << std::boolalpha
455 << result_is_ok_concurrent
456 << ", calculation_time_concurrent: "
457 << std::fixed
458 << std::setprecision(1)
459 << calculation_time_concurrent
460 << "s"
461 << std::endl;
462
463 std::cout << "result_is_ok_sequential: "
464 << std::boolalpha
465 << result_is_ok_sequential
466 << ", calculation_time_sequential: "
467 << std::fixed
468 << std::setprecision(1)
469 << calculation_time_sequential
470 << "s"
471 << std::endl;
472}