[ceph.git] / ceph / src / boost / libs / multiprecision / example / exercise_threading_log_agm.cpp

///////////////////////////////////////////////////////////////////////////////
//      Copyright Christopher Kormanyos 2020 - 2021.
// Distributed under the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE_1_0.txt or copy at
//          http://www.boost.org/LICENSE_1_0.txt)
//

// This example exercises Boost.Multiprecision in concurrent
// multi-threaded environments. To do so a loop involving
// non-trivial calculations of numerous function values
// has been set up within both concurrent as well as
// sequential running environments. In particular,
// this example uses an AGM method to do a "from the ground up"
// calculation of logarithms. The logarithm functions values
// are compared with the values from Boost.Multiprecision's
// specific log functions for the relevant backends.
// The log GM here is not optimized or intended for
// high-performance work, but can be taken as an
// interesting example of an AGM iteration if helpful.

// This example has been initially motivated in part
// by discussions in:
// https://github.com/boostorg/multiprecision/pull/211

// We find the following performance data here:
// https://github.com/boostorg/multiprecision/pull/213
//
// cpp_dec_float:
// result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
// result_is_ok_sequential: true, calculation_time_sequential: 48.5s
//
// cpp_bin_float:
// result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
// result_is_ok_sequential: true, calculation_time_sequential: 50.4s
//
// gmp_float:
// result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
// result_is_ok_sequential: true, calculation_time_sequential: 12.4s
//
// mpfr_float:
// result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
// result_is_ok_sequential: true, calculation_time_sequential: 1.9s

#include <array>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <limits>
#include <thread>
#include <vector>

#include <boost/math/constants/constants.hpp>
#include <boost/math/special_functions/prime.hpp>

#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT       101
#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT           102
#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT       103
#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT          104

#if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
#endif

#if  (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
#include <boost/multiprecision/cpp_dec_float.hpp>

using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_dec_float<501>,
                                                     boost::multiprecision::et_off>;

#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
#include <boost/multiprecision/cpp_bin_float.hpp>

using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_bin_float<501>,
                                                     boost::multiprecision::et_off>;

#elif  (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
#include <boost/multiprecision/gmp.hpp>

using big_float_type = boost::multiprecision::number<boost::multiprecision::gmp_float<501>,
                                                     boost::multiprecision::et_off>;

#elif  (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
#include <boost/multiprecision/mpfr.hpp>

using big_float_type = boost::multiprecision::number<boost::multiprecision::mpfr_float_backend<501>,
                                                     boost::multiprecision::et_off>;

#else
#error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
#endif

namespace boost { namespace multiprecision { namespace exercise_threading {

namespace detail {

namespace my_concurrency {
template<typename index_type,
         typename callable_function_type>
void parallel_for(index_type             start,
                  index_type             end,
                  callable_function_type parallel_function)
{
  // Estimate the number of threads available.
  static const unsigned int number_of_threads_hint =
    std::thread::hardware_concurrency();

  static const unsigned int number_of_threads_total =
    ((number_of_threads_hint == 0U) ? 4U : number_of_threads_hint);

  // Use only 3/4 of the available cores.
  static const unsigned int number_of_threads = number_of_threads_total - (number_of_threads_total / 8U);

  std::cout << "Executing with " << number_of_threads << " threads" << std::endl;

  // Set the size of a slice for the range functions.
  index_type n = index_type(end - start) + index_type(1);

  index_type slice =
    static_cast<index_type>(std::round(n / static_cast<float>(number_of_threads)));

  slice = (std::max)(slice, index_type(1));

  // Inner loop.
  auto launch_range =
    [&parallel_function](index_type index_lo, index_type index_hi)
    {
      for(index_type i = index_lo; i < index_hi; ++i)
      {
        parallel_function(i);
      }
    };

  // Create the thread pool and launch the jobs.
  std::vector<std::thread> pool;

  pool.reserve(number_of_threads);

  index_type i1 = start;
  index_type i2 = (std::min)(index_type(start + slice), end);

  for(index_type i = 0U; ((index_type(i + index_type(1U)) < number_of_threads) && (i1 < end)); ++i)
  {
    pool.emplace_back(launch_range, i1, i2);

    i1 = i2;

    i2 = (std::min)(index_type(i2 + slice), end);
  }

  if(i1 < end)
  {
    pool.emplace_back(launch_range, i1, end);
  }

  // Wait for the jobs to finish.
  for(std::thread& thread_in_pool : pool)
  {
    if(thread_in_pool.joinable())
    {
      thread_in_pool.join();
    }
  }
}
} // namespace my_concurrency

template<typename FloatingPointType,
         typename UnsignedIntegralType>
FloatingPointType pown(const FloatingPointType& b, const UnsignedIntegralType& p)
{
  // Calculate (b ^ p).

  using local_floating_point_type    = FloatingPointType;
  using local_unsigned_integral_type = UnsignedIntegralType;

  local_floating_point_type result;

  if     (p == local_unsigned_integral_type(0U)) { result = local_floating_point_type(1U); }
  else if(p == local_unsigned_integral_type(1U)) { result = b; }
  else if(p == local_unsigned_integral_type(2U)) { result = b; result *= b; }
  else
  {
    result = local_floating_point_type(1U);

    local_floating_point_type y(b);

    for(local_unsigned_integral_type p_local(p); p_local != local_unsigned_integral_type(0U); p_local >>= 1U)
    {
      if((static_cast<unsigned>(p_local) & 1U) != 0U)
      {
        result *= y;
      }

      y *= y;
    }
  }

  return result;
}

const std::vector<std::uint32_t>& primes()
{
  static std::vector<std::uint32_t> my_primes;

  if(my_primes.empty())
  {
    my_primes.resize(10000U);

    // Get exactly 10,000 primes.
    for(std::size_t i = 0U; i < my_primes.size(); ++i)
    {
      my_primes[i] = boost::math::prime((unsigned int) i);
    }
  }

  return my_primes;
}

} // namespace detail

template<typename FloatingPointType>
FloatingPointType log(const FloatingPointType& x)
{
  // Use an AGM method to compute the logarithm of x.

  // For values less than 1 invert the argument and
  // remember (in this case) to negate the result below.
  const bool b_negate = (x < 1);

  const FloatingPointType xx = ((b_negate == false) ? x : 1 / x);

  // Set a0 = 1
  // Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))

  FloatingPointType ak(1U);

  const float n_times_factor = static_cast<float>(static_cast<float>(std::numeric_limits<FloatingPointType>::digits10) * 1.67F);
  const float lgx_over_lg2   = std::log(static_cast<float>(xx)) / std::log(2.0F);

  std::int32_t m = static_cast<std::int32_t>(n_times_factor - lgx_over_lg2);

  // Ensure that the resulting power is non-negative.
  // Also enforce that m >= 8.
  m = (std::max)(m, static_cast<std::int32_t>(8));

  FloatingPointType bk = detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m));

  bk *= xx;
  bk  = 4 / bk;

  FloatingPointType ak_tmp(0U);

  using std::sqrt;

  // Determine the requested precision of the upcoming iteration in units of digits10.
  const FloatingPointType target_tolerance = sqrt(std::numeric_limits<FloatingPointType>::epsilon()) / 100;

  for(std::int32_t k = static_cast<std::int32_t>(0); k < static_cast<std::int32_t>(64); ++k)
  {
    using std::fabs;

    // Check for the number of significant digits to be
    // at least half of the requested digits. If at least
    // half of the requested digits have been achieved,
    // then break after the upcoming iteration.
    const bool break_after_this_iteration = (   (k > static_cast<std::int32_t>(4))
                                             && (fabs(1 - fabs(ak / bk)) < target_tolerance));

    ak_tmp  = ak;
    ak     += bk;
    ak     /= 2;

    if(break_after_this_iteration)
    {
      break;
    }

    bk *= ak_tmp;
    bk  = sqrt(bk);
  }

  // We are now finished with the AGM iteration for log(x).

  // Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
  // Note at this time that (ak = bk) = AGM(...)

  // Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
  const FloatingPointType result =
       boost::math::constants::pi<FloatingPointType>() / (ak * 2)
    - (boost::math::constants::ln_two<FloatingPointType>() * m);

  return ((b_negate == true) ? -result : result);
}

} } } // namespace boost::multiprecision::exercise_threading

template<typename FloatingPointType>
bool log_agm_concurrent(float& calculation_time)
{
  const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();

  std::vector<FloatingPointType> log_results(count);
  std::vector<FloatingPointType> log_control(count);

  std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;

  std::size_t concurrent_log_agm_count = 0U;

  const std::clock_t start = std::clock();

  boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
  (
    std::size_t(0U),
    log_results.size(),
    [&log_results, &log_control, &concurrent_log_agm_count, &log_agm_lock](std::size_t i)
    {
      while(log_agm_lock.test_and_set()) { ; }
      const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
      log_agm_lock.clear();

      const FloatingPointType  x = boost::math::constants::catalan<FloatingPointType>() + dx;

      const FloatingPointType lr = boost::multiprecision::exercise_threading::log(x);
      const FloatingPointType lc = boost::multiprecision::log(x);

      while(log_agm_lock.test_and_set()) { ; }

      log_results[i] = lr;
      log_control[i] = lc;

      ++concurrent_log_agm_count;

      if((concurrent_log_agm_count % 100U) == 0U)
      {
        std::cout << "log agm concurrent at index "
                  << concurrent_log_agm_count
                  << " of "
                  << log_results.size()
                  << ". Total processed so far: "
                  << std::fixed
                  << std::setprecision(1)
                  << (100.0F * float(concurrent_log_agm_count)) / float(log_results.size())
                  << "%."
                  << "\r";
      }

      log_agm_lock.clear();
    }
  );

  calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);

  std::cout << std::endl;

  std::cout << "Checking results concurrent: ";

  bool result_is_ok = true;

  const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;

  for(std::size_t i = 0U; i < log_results.size(); ++i)
  {
    using std::fabs;

    const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));

    result_is_ok &= (close_fraction < tol);
  }

  std::cout << std::boolalpha << result_is_ok << std::endl;

  return result_is_ok;
}

template<typename FloatingPointType>
bool log_agm_sequential(float& calculation_time)
{
  const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();

  std::vector<FloatingPointType> log_results(count);
  std::vector<FloatingPointType> log_control(count);

  const std::clock_t start = std::clock();

  for(std::size_t i = 0U; i < log_results.size(); ++i)
  {
    const std::size_t sequential_log_agm_count = i + 1U;

    const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
    const FloatingPointType  x = boost::math::constants::catalan<FloatingPointType>() + dx;

    log_results[i] = boost::multiprecision::exercise_threading::log(x);
    log_control[i] = boost::multiprecision::log(x);

    if((sequential_log_agm_count % 100U) == 0U)
    {
      std::cout << "log agm sequential at index "
                << sequential_log_agm_count
                << " of "
                << log_results.size()
                << ". Total processed so far: "
                << std::fixed
                << std::setprecision(1)
                << (100.0F * float(sequential_log_agm_count)) / float(log_results.size())
                << "%."
                << "\r";
    }
  }

  calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);

  std::cout << std::endl;

  std::cout << "Checking results sequential: ";

  bool result_is_ok = true;

  const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;

  for(std::size_t i = 0U; i < log_results.size(); ++i)
  {
    using std::fabs;

    const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));

    result_is_ok &= (close_fraction < tol);
  }

  std::cout << std::boolalpha << result_is_ok << std::endl;

  return result_is_ok;
}

int main()
{
  std::cout << "Calculating "
            << boost::multiprecision::exercise_threading::detail::primes().size()
            << " primes"
            << std::endl;

  float calculation_time_concurrent;
  const bool result_is_ok_concurrent = log_agm_concurrent<big_float_type>(calculation_time_concurrent);

  float calculation_time_sequential;
  const bool result_is_ok_sequential = log_agm_sequential<big_float_type>(calculation_time_sequential);

  std::cout << std::endl;

  std::cout << "result_is_ok_concurrent: "
            << std::boolalpha
            << result_is_ok_concurrent
            << ", calculation_time_concurrent: "
            << std::fixed
            << std::setprecision(1)
            << calculation_time_concurrent
            << "s"
            << std::endl;

  std::cout << "result_is_ok_sequential: "
            << std::boolalpha
            << result_is_ok_sequential
            << ", calculation_time_sequential: "
            << std::fixed
            << std::setprecision(1)
            << calculation_time_sequential
            << "s"
            << std::endl;
}
Commit	Line	Data
20effc67	1	///////////////////////////////////////////////////////////////////////////////
1e59de90	2	// Copyright Christopher Kormanyos 2020 - 2021.
20effc67 TL	3	// Distributed under the Boost Software License, Version 1.0.
	4	// (See accompanying file LICENSE_1_0.txt or copy at
	5	// http://www.boost.org/LICENSE_1_0.txt)
	6	//
	7
	8	// This example exercises Boost.Multiprecision in concurrent
	9	// multi-threaded environments. To do so a loop involving
	10	// non-trivial calculations of numerous function values
	11	// has been set up within both concurrent as well as
	12	// sequential running environments. In particular,
	13	// this example uses an AGM method to do a "from the ground up"
	14	// calculation of logarithms. The logarithm functions values
	15	// are compared with the values from Boost.Multiprecision's
	16	// specific log functions for the relevant backends.
	17	// The log GM here is not optimized or intended for
	18	// high-performance work, but can be taken as an
	19	// interesting example of an AGM iteration if helpful.
	20
	21	// This example has been initially motivated in part
	22	// by discussions in:
	23	// https://github.com/boostorg/multiprecision/pull/211
	24
	25	// We find the following performance data here:
	26	// https://github.com/boostorg/multiprecision/pull/213
	27	//
	28	// cpp_dec_float:
	29	// result_is_ok_concurrent: true, calculation_time_concurrent: 18.1s
	30	// result_is_ok_sequential: true, calculation_time_sequential: 48.5s
	31	//
	32	// cpp_bin_float:
	33	// result_is_ok_concurrent: true, calculation_time_concurrent: 18.7s
	34	// result_is_ok_sequential: true, calculation_time_sequential: 50.4s
	35	//
	36	// gmp_float:
	37	// result_is_ok_concurrent: true, calculation_time_concurrent: 3.3s
	38	// result_is_ok_sequential: true, calculation_time_sequential: 12.4s
	39	//
	40	// mpfr_float:
	41	// result_is_ok_concurrent: true, calculation_time_concurrent: 0.6s
	42	// result_is_ok_sequential: true, calculation_time_sequential: 1.9s
	43
	44	#include <array>
	45	#include <atomic>
	46	#include <cstddef>
	47	#include <cstdint>
	48	#include <iomanip>
	49	#include <iostream>
	50	#include <limits>
	51	#include <thread>
	52	#include <vector>
	53
	54	#include <boost/math/constants/constants.hpp>
	55	#include <boost/math/special_functions/prime.hpp>
	56
	57	#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT 101
	58	#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT 102
	59	#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT 103
	60	#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT 104
	61
	62	#if !defined(BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE)
	63	#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT
	64	//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT
	65	//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT
	66	//#define BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT
67	#endif
68
69	#if (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_DEC_FLOAT)
70	#include <boost/multiprecision/cpp_dec_float.hpp>
71
72	using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_dec_float<501>,
73	boost::multiprecision::et_off>;
74
75	#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_CPP_BIN_FLOAT)
76	#include <boost/multiprecision/cpp_bin_float.hpp>
77
78	using big_float_type = boost::multiprecision::number<boost::multiprecision::cpp_bin_float<501>,
79	boost::multiprecision::et_off>;
80
81	#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_GMP_FLOAT)
82	#include <boost/multiprecision/gmp.hpp>
83
84	using big_float_type = boost::multiprecision::number<boost::multiprecision::gmp_float<501>,
85	boost::multiprecision::et_off>;
86
87	#elif (BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE == BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_MPFR_FLOAT)
88	#include <boost/multiprecision/mpfr.hpp>
89
90	using big_float_type = boost::multiprecision::number<boost::multiprecision::mpfr_float_backend<501>,
91	boost::multiprecision::et_off>;
92
93	#else
94	#error BOOST_MULTIPRECISION_EXERCISE_THREADING_BACKEND_TYPE is undefined.
95	#endif
96
97	namespace boost { namespace multiprecision { namespace exercise_threading {
98
99	namespace detail {
100
101	namespace my_concurrency {
102	template<typename index_type,
103	typename callable_function_type>
104	void parallel_for(index_type start,
105	index_type end,
106	callable_function_type parallel_function)
107	{
108	// Estimate the number of threads available.
109	static const unsigned int number_of_threads_hint =
110	std::thread::hardware_concurrency();
111
112	static const unsigned int number_of_threads_total =
113	((number_of_threads_hint == 0U) ? 4U : number_of_threads_hint);
114
115	// Use only 3/4 of the available cores.
116	static const unsigned int number_of_threads = number_of_threads_total - (number_of_threads_total / 8U);
117
118	std::cout << "Executing with " << number_of_threads << " threads" << std::endl;
119
120	// Set the size of a slice for the range functions.
121	index_type n = index_type(end - start) + index_type(1);
122
123	index_type slice =
124	static_cast<index_type>(std::round(n / static_cast<float>(number_of_threads)));
125
126	slice = (std::max)(slice, index_type(1));
127
128	// Inner loop.
129	auto launch_range =
130	[&parallel_function](index_type index_lo, index_type index_hi)
131	{
132	for(index_type i = index_lo; i < index_hi; ++i)
133	{
134	parallel_function(i);
135	}
136	};
137
138	// Create the thread pool and launch the jobs.
139	std::vector<std::thread> pool;
140
141	pool.reserve(number_of_threads);
142
143	index_type i1 = start;
144	index_type i2 = (std::min)(index_type(start + slice), end);
145
146	for(index_type i = 0U; ((index_type(i + index_type(1U)) < number_of_threads) && (i1 < end)); ++i)
147	{
148	pool.emplace_back(launch_range, i1, i2);
149
150	i1 = i2;
151
152	i2 = (std::min)(index_type(i2 + slice), end);
153	}
154
155	if(i1 < end)
156	{
157	pool.emplace_back(launch_range, i1, end);
158	}
159
160	// Wait for the jobs to finish.
161	for(std::thread& thread_in_pool : pool)
162	{
163	if(thread_in_pool.joinable())
164	{
165	thread_in_pool.join();
166	}
167	}
168	}
169	} // namespace my_concurrency
170
171	template<typename FloatingPointType,
172	typename UnsignedIntegralType>
173	FloatingPointType pown(const FloatingPointType& b, const UnsignedIntegralType& p)
174	{
175	// Calculate (b ^ p).
176
177	using local_floating_point_type = FloatingPointType;
178	using local_unsigned_integral_type = UnsignedIntegralType;
179
180	local_floating_point_type result;
181
182	if (p == local_unsigned_integral_type(0U)) { result = local_floating_point_type(1U); }
183	else if(p == local_unsigned_integral_type(1U)) { result = b; }
184	else if(p == local_unsigned_integral_type(2U)) { result = b; result *= b; }
185	else
186	{
187	result = local_floating_point_type(1U);
188
189	local_floating_point_type y(b);
190
191	for(local_unsigned_integral_type p_local(p); p_local != local_unsigned_integral_type(0U); p_local >>= 1U)
192	{
193	if((static_cast<unsigned>(p_local) & 1U) != 0U)
194	{
195	result *= y;
196	}
197
198	y *= y;
199	}
200	}
201
202	return result;
203	}
204
205	const std::vector<std::uint32_t>& primes()
206	{
1e59de90 TL	207	static std::vector<std::uint32_t> my_primes;
	208
	209	if(my_primes.empty())
20effc67	210	{
1e59de90	211	my_primes.resize(10000U);
20effc67 TL	212
20effc67 TL	213	// Get exactly 10,000 primes.
1e59de90	214	for(std::size_t i = 0U; i < my_primes.size(); ++i)
20effc67	215	{
1e59de90	216	my_primes[i] = boost::math::prime((unsigned int) i);
20effc67	217	}
1e59de90	218	}
20effc67 TL	219
	220	return my_primes;
	221	}
	222
	223	} // namespace detail
	224
	225	template<typename FloatingPointType>
	226	FloatingPointType log(const FloatingPointType& x)
	227	{
	228	// Use an AGM method to compute the logarithm of x.
	229
	230	// For values less than 1 invert the argument and
	231	// remember (in this case) to negate the result below.
	232	const bool b_negate = (x < 1);
	233
	234	const FloatingPointType xx = ((b_negate == false) ? x : 1 / x);
	235
	236	// Set a0 = 1
	237	// Set b0 = 4 / (x * 2^m) = 1 / (x * 2^(m - 2))
	238
	239	FloatingPointType ak(1U);
	240
	241	const float n_times_factor = static_cast<float>(static_cast<float>(std::numeric_limits<FloatingPointType>::digits10) * 1.67F);
	242	const float lgx_over_lg2 = std::log(static_cast<float>(xx)) / std::log(2.0F);
	243
	244	std::int32_t m = static_cast<std::int32_t>(n_times_factor - lgx_over_lg2);
	245
	246	// Ensure that the resulting power is non-negative.
	247	// Also enforce that m >= 8.
	248	m = (std::max)(m, static_cast<std::int32_t>(8));
	249
	250	FloatingPointType bk = detail::pown(FloatingPointType(2), static_cast<std::uint32_t>(m));
	251
	252	bk *= xx;
	253	bk = 4 / bk;
	254
	255	FloatingPointType ak_tmp(0U);
	256
	257	using std::sqrt;
	258
	259	// Determine the requested precision of the upcoming iteration in units of digits10.
	260	const FloatingPointType target_tolerance = sqrt(std::numeric_limits<FloatingPointType>::epsilon()) / 100;
	261
	262	for(std::int32_t k = static_cast<std::int32_t>(0); k < static_cast<std::int32_t>(64); ++k)
	263	{
	264	using std::fabs;
	265
	266	// Check for the number of significant digits to be
	267	// at least half of the requested digits. If at least
	268	// half of the requested digits have been achieved,
	269	// then break after the upcoming iteration.
	270	const bool break_after_this_iteration = ( (k > static_cast<std::int32_t>(4))
	271	&& (fabs(1 - fabs(ak / bk)) < target_tolerance));
	272
	273	ak_tmp = ak;
	274	ak += bk;
	275	ak /= 2;
	276
	277	if(break_after_this_iteration)
	278	{
	279	break;
	280	}
	281
	282	bk *= ak_tmp;
283	bk = sqrt(bk);
284	}
285
286	// We are now finished with the AGM iteration for log(x).
287
288	// Compute log(x) = {pi / [2 * AGM(1, 4 / 2^m)]} - (m * ln2)
289	// Note at this time that (ak = bk) = AGM(...)
290
291	// Retrieve the value of pi, divide by (2 * a) and subtract (m * ln2).
292	const FloatingPointType result =
293	boost::math::constants::pi<FloatingPointType>() / (ak * 2)
294	- (boost::math::constants::ln_two<FloatingPointType>() * m);
295
296	return ((b_negate == true) ? -result : result);
297	}
298
299	} } } // namespace boost::multiprecision::exercise_threading
300
301	template<typename FloatingPointType>
302	bool log_agm_concurrent(float& calculation_time)
303	{
304	const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
305
306	std::vector<FloatingPointType> log_results(count);
307	std::vector<FloatingPointType> log_control(count);
308
309	std::atomic_flag log_agm_lock = ATOMIC_FLAG_INIT;
310
311	std::size_t concurrent_log_agm_count = 0U;
312
313	const std::clock_t start = std::clock();
314
315	boost::multiprecision::exercise_threading::detail::my_concurrency::parallel_for
316	(
317	std::size_t(0U),
318	log_results.size(),
319	[&log_results, &log_control, &concurrent_log_agm_count, &log_agm_lock](std::size_t i)
320	{
321	while(log_agm_lock.test_and_set()) { ; }
1e59de90 TL	322	const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
	323	log_agm_lock.clear();
	324
	325	const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
	326
	327	const FloatingPointType lr = boost::multiprecision::exercise_threading::log(x);
	328	const FloatingPointType lc = boost::multiprecision::log(x);
	329
	330	while(log_agm_lock.test_and_set()) { ; }
	331
	332	log_results[i] = lr;
	333	log_control[i] = lc;
	334
20effc67	335	++concurrent_log_agm_count;
1e59de90	336
20effc67 TL	337	if((concurrent_log_agm_count % 100U) == 0U)
	338	{
	339	std::cout << "log agm concurrent at index "
	340	<< concurrent_log_agm_count
	341	<< " of "
	342	<< log_results.size()
	343	<< ". Total processed so far: "
	344	<< std::fixed
	345	<< std::setprecision(1)
	346	<< (100.0F * float(concurrent_log_agm_count)) / float(log_results.size())
	347	<< "%."
	348	<< "\r";
	349	}
20effc67	350
1e59de90	351	log_agm_lock.clear();
20effc67 TL	352	}
	353	);
	354
	355	calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
	356
	357	std::cout << std::endl;
	358
	359	std::cout << "Checking results concurrent: ";
	360
	361	bool result_is_ok = true;
	362
1e59de90 TL	363	const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
1e59de90 TL	364
20effc67 TL	365	for(std::size_t i = 0U; i < log_results.size(); ++i)
	366	{
	367	using std::fabs;
	368
	369	const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
	370
1e59de90	371	result_is_ok &= (close_fraction < tol);
20effc67 TL	372	}
	373
	374	std::cout << std::boolalpha << result_is_ok << std::endl;
	375
	376	return result_is_ok;
	377	}
	378
	379	template<typename FloatingPointType>
	380	bool log_agm_sequential(float& calculation_time)
	381	{
	382	const std::size_t count = boost::multiprecision::exercise_threading::detail::primes().size();
	383
	384	std::vector<FloatingPointType> log_results(count);
	385	std::vector<FloatingPointType> log_control(count);
	386
20effc67 TL	387	const std::clock_t start = std::clock();
	388
	389	for(std::size_t i = 0U; i < log_results.size(); ++i)
	390	{
	391	const std::size_t sequential_log_agm_count = i + 1U;
	392
1e59de90 TL	393	const FloatingPointType dx = (FloatingPointType(1U) / (boost::multiprecision::exercise_threading::detail::primes()[i]));
	394	const FloatingPointType x = boost::math::constants::catalan<FloatingPointType>() + dx;
	395
	396	log_results[i] = boost::multiprecision::exercise_threading::log(x);
	397	log_control[i] = boost::multiprecision::log(x);
	398
20effc67 TL	399	if((sequential_log_agm_count % 100U) == 0U)
	400	{
	401	std::cout << "log agm sequential at index "
	402	<< sequential_log_agm_count
	403	<< " of "
	404	<< log_results.size()
	405	<< ". Total processed so far: "
	406	<< std::fixed
	407	<< std::setprecision(1)
	408	<< (100.0F * float(sequential_log_agm_count)) / float(log_results.size())
	409	<< "%."
	410	<< "\r";
	411	}
20effc67 TL	412	}
	413
	414	calculation_time = static_cast<float>(std::clock() - start) / static_cast<float>(CLOCKS_PER_SEC);
	415
	416	std::cout << std::endl;
	417
	418	std::cout << "Checking results sequential: ";
	419
	420	bool result_is_ok = true;
	421
1e59de90 TL	422	const FloatingPointType tol = std::numeric_limits<FloatingPointType>::epsilon() * 1000000U;
1e59de90 TL	423
20effc67 TL	424	for(std::size_t i = 0U; i < log_results.size(); ++i)
	425	{
	426	using std::fabs;
	427
	428	const FloatingPointType close_fraction = fabs(1 - (log_results[i] / log_control[i]));
	429
1e59de90	430	result_is_ok &= (close_fraction < tol);
20effc67 TL	431	}
	432
	433	std::cout << std::boolalpha << result_is_ok << std::endl;
	434
	435	return result_is_ok;
	436	}
	437
	438	int main()
	439	{
	440	std::cout << "Calculating "
	441	<< boost::multiprecision::exercise_threading::detail::primes().size()
	442	<< " primes"
	443	<< std::endl;
	444
	445	float calculation_time_concurrent;
	446	const bool result_is_ok_concurrent = log_agm_concurrent<big_float_type>(calculation_time_concurrent);
	447
	448	float calculation_time_sequential;
	449	const bool result_is_ok_sequential = log_agm_sequential<big_float_type>(calculation_time_sequential);
	450
	451	std::cout << std::endl;
	452
	453	std::cout << "result_is_ok_concurrent: "
	454	<< std::boolalpha
	455	<< result_is_ok_concurrent
	456	<< ", calculation_time_concurrent: "
	457	<< std::fixed
	458	<< std::setprecision(1)
	459	<< calculation_time_concurrent
	460	<< "s"
	461	<< std::endl;
	462
	463	std::cout << "result_is_ok_sequential: "
	464	<< std::boolalpha
	465	<< result_is_ok_sequential
	466	<< ", calculation_time_sequential: "
	467	<< std::fixed
	468	<< std::setprecision(1)
	469	<< calculation_time_sequential
	470	<< "s"
	471	<< std::endl;
	472	}