ceph/src/boost/boost/math/quadrature/naive_monte_carlo.hpp

   1 /*
   2  * Copyright Nick Thompson, 2018
   3  * Use, modification and distribution are subject to the
   4  * Boost Software License, Version 1.0. (See accompanying file
   5  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   6  */
   7 #ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
   8 #define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
   9 #include <sstream>
  10 #include <algorithm>
  11 #include <vector>
  12 #include <boost/atomic.hpp>
  13 #include <functional>
  14 #include <future>
  15 #include <thread>
  16 #include <initializer_list>
  17 #include <utility>
  18 #include <random>
  19 #include <chrono>
  20 #include <map>
  21 #include <boost/math/policies/error_handling.hpp>
  22
  23 namespace boost { namespace math { namespace quadrature {
  24
  25 namespace detail {
  26   enum class limit_classification {FINITE,
  27                                    LOWER_BOUND_INFINITE,
  28                                    UPPER_BOUND_INFINITE,
  29                                    DOUBLE_INFINITE};
  30 }
  31
  32 template<class Real, class F, class RandomNumberGenerator = std::mt19937_64, class Policy = boost::math::policies::policy<>>
  33 class naive_monte_carlo
  34 {
  35 public:
  36     naive_monte_carlo(const F& integrand,
  37                       std::vector<std::pair<Real, Real>> const & bounds,
  38                       Real error_goal,
  39                       bool singular = true,
  40                       uint64_t threads = std::thread::hardware_concurrency(),
  41                       uint64_t seed = 0): m_num_threads{threads}, m_seed{seed}
  42     {
  43         using std::numeric_limits;
  44         using std::sqrt;
  45         uint64_t n = bounds.size();
  46         m_lbs.resize(n);
  47         m_dxs.resize(n);
  48         m_limit_types.resize(n);
  49         m_volume = 1;
  50         static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
  51         for (uint64_t i = 0; i < n; ++i)
  52         {
  53             if (bounds[i].second <= bounds[i].first)
  54             {
  55                 boost::math::policies::raise_domain_error(function, "The upper bound is <= the lower bound.\n", bounds[i].second, Policy());
  56                 return;
  57             }
  58             if (bounds[i].first == -numeric_limits<Real>::infinity())
  59             {
  60                 if (bounds[i].second == numeric_limits<Real>::infinity())
  61                 {
  62                     m_limit_types[i] = detail::limit_classification::DOUBLE_INFINITE;
  63                 }
  64                 else
  65                 {
  66                     m_limit_types[i] = detail::limit_classification::LOWER_BOUND_INFINITE;
  67                     // Ok ok this is bad to use the second bound as the lower limit and then reflect.
  68                     m_lbs[i] = bounds[i].second;
  69                     m_dxs[i] = numeric_limits<Real>::quiet_NaN();
  70                 }
  71             }
  72             else if (bounds[i].second == numeric_limits<Real>::infinity())
  73             {
  74                 m_limit_types[i] = detail::limit_classification::UPPER_BOUND_INFINITE;
  75                 if (singular)
  76                 {
  77                     // I've found that it's easier to sample on a closed set and perturb the boundary
  78                     // than to try to sample very close to the boundary.
  79                     m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
  80                 }
  81                 else
  82                 {
  83                     m_lbs[i] = bounds[i].first;
  84                 }
  85                 m_dxs[i] = numeric_limits<Real>::quiet_NaN();
  86             }
  87             else
  88             {
  89                 m_limit_types[i] = detail::limit_classification::FINITE;
  90                 if (singular)
  91                 {
  92                     if (bounds[i].first == 0)
  93                     {
  94                         m_lbs[i] = std::numeric_limits<Real>::epsilon();
  95                     }
  96                     else
  97                     {
  98                         m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
  99                     }
 100
 101                     m_dxs[i] = std::nextafter(bounds[i].second, std::numeric_limits<Real>::lowest()) - m_lbs[i];
 102                 }
 103                 else
 104                 {
 105                     m_lbs[i] = bounds[i].first;
 106                     m_dxs[i] = bounds[i].second - bounds[i].first;
 107                 }
 108                 m_volume *= m_dxs[i];
 109             }
 110         }
 111
 112         m_integrand = [this, &integrand](std::vector<Real> & x)->Real
 113         {
 114             Real coeff = m_volume;
 115             for (uint64_t i = 0; i < x.size(); ++i)
 116             {
 117                 // Variable transformation are listed at:
 118                 // https://en.wikipedia.org/wiki/Numerical_integration
 119                 // However, we've made some changes to these so that we can evaluate on a compact domain.
 120                 if (m_limit_types[i] == detail::limit_classification::FINITE)
 121                 {
 122                     x[i] = m_lbs[i] + x[i]*m_dxs[i];
 123                 }
 124                 else if (m_limit_types[i] == detail::limit_classification::UPPER_BOUND_INFINITE)
 125                 {
 126                     Real t = x[i];
 127                     Real z = 1/(1 + numeric_limits<Real>::epsilon() - t);
 128                     coeff *= (z*z)*(1 + numeric_limits<Real>::epsilon());
 129                     x[i] = m_lbs[i] + t*z;
 130                 }
 131                 else if (m_limit_types[i] == detail::limit_classification::LOWER_BOUND_INFINITE)
 132                 {
 133                     Real t = x[i];
 134                     Real z = 1/(t+sqrt((numeric_limits<Real>::min)()));
 135                     coeff *= (z*z);
 136                     x[i] = m_lbs[i] + (t-1)*z;
 137                 }
 138                 else
 139                 {
 140                     Real t1 = 1/(1+numeric_limits<Real>::epsilon() - x[i]);
 141                     Real t2 = 1/(x[i]+numeric_limits<Real>::epsilon());
 142                     x[i] = (2*x[i]-1)*t1*t2/4;
 143                     coeff *= (t1*t1+t2*t2)/4;
 144                 }
 145             }
 146             return coeff*integrand(x);
 147         };
 148
 149         // If we don't do a single function call in the constructor,
 150         // we can't do a restart.
 151         std::vector<Real> x(m_lbs.size());
 152
 153         // If the seed is zero, that tells us to choose a random seed for the user:
 154         if (seed == 0)
 155         {
 156             std::random_device rd;
 157             seed = rd();
 158         }
 159
 160         RandomNumberGenerator gen(seed);
 161         Real inv_denom = 1/static_cast<Real>(((gen.max)()-(gen.min)()));
 162
 163         m_num_threads = (std::max)(m_num_threads, (uint64_t) 1);
 164         m_thread_calls.reset(new boost::atomic<uint64_t>[threads]);
 165         m_thread_Ss.reset(new boost::atomic<Real>[threads]);
 166         m_thread_averages.reset(new boost::atomic<Real>[threads]);
 167
 168         Real avg = 0;
 169         for (uint64_t i = 0; i < m_num_threads; ++i)
 170         {
 171             for (uint64_t j = 0; j < m_lbs.size(); ++j)
 172             {
 173                 x[j] = (gen()-(gen.min)())*inv_denom;
 174             }
 175             Real y = m_integrand(x);
 176             m_thread_averages[i] = y; // relaxed store
 177             m_thread_calls[i] = 1;
 178             m_thread_Ss[i] = 0;
 179             avg += y;
 180         }
 181         avg /= m_num_threads;
 182         m_avg = avg; // relaxed store
 183
 184         m_error_goal = error_goal; // relaxed store
 185         m_start = std::chrono::system_clock::now();
 186         m_done = false; // relaxed store
 187         m_total_calls = m_num_threads;  // relaxed store
 188         m_variance = (numeric_limits<Real>::max)();
 189     }
 190
 191     std::future<Real> integrate()
 192     {
 193         // Set done to false in case we wish to restart:
 194         m_done.store(false); // relaxed store, no worker threads yet
 195         m_start = std::chrono::system_clock::now();
 196         return std::async(std::launch::async,
 197                           &naive_monte_carlo::m_integrate, this);
 198     }
 199
 200     void cancel()
 201     {
 202         // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same.
 203         // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing.
 204         m_seed = m_seed*m_seed;
 205         m_done = true; // relaxed store, worker threads will get the message eventually
 206         // Make sure the error goal is infinite, because otherwise we'll loop when we do the final error goal check:
 207         m_error_goal = (std::numeric_limits<Real>::max)();
 208     }
 209
 210     Real variance() const
 211     {
 212         return m_variance.load();
 213     }
 214
 215     Real current_error_estimate() const
 216     {
 217         using std::sqrt;
 218         //
 219         // There is a bug here: m_variance and m_total_calls get updated asynchronously
 220         // and may be out of synch when we compute the error estimate, not sure if it matters though...
 221         //
 222         return sqrt(m_variance.load()/m_total_calls.load());
 223     }
 224
 225     std::chrono::duration<Real> estimated_time_to_completion() const
 226     {
 227         auto now = std::chrono::system_clock::now();
 228         std::chrono::duration<Real> elapsed_seconds = now - m_start;
 229         Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load
 230         if (r*r <= 1) {
 231             return 0*elapsed_seconds;
 232         }
 233         return (r*r - 1)*elapsed_seconds;
 234     }
 235
 236     void update_target_error(Real new_target_error)
 237     {
 238         m_error_goal = new_target_error;  // relaxed store
 239     }
 240
 241     Real progress() const
 242     {
 243         Real r = m_error_goal.load()/this->current_error_estimate();  // relaxed load
 244         if (r*r >= 1)
 245         {
 246             return 1;
 247         }
 248         return r*r;
 249     }
 250
 251     Real current_estimate() const
 252     {
 253         return m_avg.load();
 254     }
 255
 256     uint64_t calls() const
 257     {
 258         return m_total_calls.load();  // relaxed load
 259     }
 260
 261 private:
 262
 263    Real m_integrate()
 264    {
 265       uint64_t seed;
 266       // If the user tells us to pick a seed, pick a seed:
 267       if (m_seed == 0)
 268       {
 269          std::random_device rd;
 270          seed = rd();
 271       }
 272       else // use the seed we are given:
 273       {
 274          seed = m_seed;
 275       }
 276       RandomNumberGenerator gen(seed);
 277       int max_repeat_tries = 5;
 278       do{
 279
 280          if (max_repeat_tries < 5)
 281          {
 282             m_done = false;
 283
 284 #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
 285             std::cout << "Failed to achieve required tolerance first time through..\n";
 286             std::cout << "  variance =    " << m_variance << std::endl;
 287             std::cout << "  average =     " << m_avg << std::endl;
 288             std::cout << "  total calls = " << m_total_calls << std::endl;
 289
 290             for (std::size_t i = 0; i < m_num_threads; ++i)
 291                std::cout << "  thread_calls[" << i << "] = " << m_thread_calls[i] << std::endl;
 292             for (std::size_t i = 0; i < m_num_threads; ++i)
 293                std::cout << "  thread_averages[" << i << "] = " << m_thread_averages[i] << std::endl;
 294             for (std::size_t i = 0; i < m_num_threads; ++i)
 295                std::cout << "  thread_Ss[" << i << "] = " << m_thread_Ss[i] << std::endl;
 296 #endif
 297          }
 298
 299          std::vector<std::thread> threads(m_num_threads);
 300          for (uint64_t i = 0; i < threads.size(); ++i)
 301          {
 302             threads[i] = std::thread(&naive_monte_carlo::m_thread_monte, this, i, gen());
 303          }
 304          do {
 305             std::this_thread::sleep_for(std::chrono::milliseconds(100));
 306             uint64_t total_calls = 0;
 307             for (uint64_t i = 0; i < m_num_threads; ++i)
 308             {
 309                uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
 310                total_calls += t_calls;
 311             }
 312             Real variance = 0;
 313             Real avg = 0;
 314             for (uint64_t i = 0; i < m_num_threads; ++i)
 315             {
 316                uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
 317                // Will this overflow? Not hard to remove . . .
 318                avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls);
 319                variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
 320             }
 321             m_avg.store(avg, boost::memory_order::release);
 322             m_variance.store(variance / (total_calls - 1), boost::memory_order::release);
 323             m_total_calls = total_calls; // relaxed store, it's just for user feedback
 324             // Allow cancellation:
 325             if (m_done) // relaxed load
 326             {
 327                break;
 328             }
 329          } while (m_total_calls < 2048 || this->current_error_estimate() > m_error_goal.load(boost::memory_order::consume));
 330          // Error bound met; signal the threads:
 331          m_done = true; // relaxed store, threads will get the message in the end
 332          std::for_each(threads.begin(), threads.end(),
 333             std::mem_fn(&std::thread::join));
 334          if (m_exception)
 335          {
 336             std::rethrow_exception(m_exception);
 337          }
 338          // Incorporate their work into the final estimate:
 339          uint64_t total_calls = 0;
 340          for (uint64_t i = 0; i < m_num_threads; ++i)
 341          {
 342             uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
 343             total_calls += t_calls;
 344          }
 345          Real variance = 0;
 346          Real avg = 0;
 347
 348          for (uint64_t i = 0; i < m_num_threads; ++i)
 349          {
 350             uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
 351             // Averages weighted by the number of calls the thread made:
 352             avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls);
 353             variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
 354          }
 355          m_avg.store(avg, boost::memory_order::release);
 356          m_variance.store(variance / (total_calls - 1), boost::memory_order::release);
 357          m_total_calls = total_calls; // relaxed store, this is just user feedback
 358
 359          // Sometimes, the master will observe the variance at a very "good" (or bad?) moment,
 360          // Then the threads proceed to find the variance is much greater by the time they hear the message to stop.
 361          // This *WOULD* make sure that the final error estimate is within the error bounds.
 362       }
 363       while ((--max_repeat_tries >= 0) && (this->current_error_estimate() > m_error_goal));
 364
 365       return m_avg.load(boost::memory_order::consume);
 366     }
 367
 368     void m_thread_monte(uint64_t thread_index, uint64_t seed)
 369     {
 370         using std::numeric_limits;
 371         try
 372         {
 373             std::vector<Real> x(m_lbs.size());
 374             RandomNumberGenerator gen(seed);
 375             Real inv_denom = (Real) 1/(Real)( (gen.max)() - (gen.min)()  );
 376             Real M1 = m_thread_averages[thread_index].load(boost::memory_order::consume);
 377             Real S = m_thread_Ss[thread_index].load(boost::memory_order::consume);
 378             // Kahan summation is required or the value of the integrand will go on a random walk during long computations.
 379             // See the implementation discussion.
 380             // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges!
 381             // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware)
 382             Real compensator = 0;
 383             uint64_t k = m_thread_calls[thread_index].load(boost::memory_order::consume);
 384             while (!m_done) // relaxed load
 385             {
 386                 int j = 0;
 387                 // If we don't have a certain number of calls before an update, we can easily terminate prematurely
 388                 // because the variance estimate is way too low. This magic number is a reasonable compromise, as 1/sqrt(2048) = 0.02,
 389                 // so it should recover 2 digits if the integrand isn't poorly behaved, and if it is, it should discover that before premature termination.
 390                 // Of course if the user has 64 threads, then this number is probably excessive.
 391                 int magic_calls_before_update = 2048;
 392                 while (j++ < magic_calls_before_update)
 393                 {
 394                     for (uint64_t i = 0; i < m_lbs.size(); ++i)
 395                     {
 396                         x[i] = (gen() - (gen.min)())*inv_denom;
 397                     }
 398                     Real f = m_integrand(x);
 399                     using std::isfinite;
 400                     if (!isfinite(f))
 401                     {
 402                         // The call to m_integrand transform x, so this error message states the correct node.
 403                         std::stringstream os;
 404                         os << "Your integrand was evaluated at {";
 405                         for (uint64_t i = 0; i < x.size() -1; ++i)
 406                         {
 407                              os << x[i] << ", ";
 408                         }
 409                         os << x[x.size() -1] << "}, and returned " << f << std::endl;
 410                         static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
 411                         boost::math::policies::raise_domain_error(function, os.str().c_str(), /*this is a dummy arg to make it compile*/ 7.2, Policy());
 412                     }
 413                     ++k;
 414                     Real term = (f - M1)/k;
 415                     Real y1 = term - compensator;
 416                     Real M2 = M1 + y1;
 417                     compensator = (M2 - M1) - y1;
 418                     S += (f - M1)*(f - M2);
 419                     M1 = M2;
 420                 }
 421                 m_thread_averages[thread_index].store(M1, boost::memory_order::release);
 422                 m_thread_Ss[thread_index].store(S, boost::memory_order::release);
 423                 m_thread_calls[thread_index].store(k, boost::memory_order::release);
 424             }
 425         }
 426         catch (...)
 427         {
 428             // Signal the other threads that the computation is ruined:
 429             m_done = true; // relaxed store
 430             m_exception = std::current_exception();
 431         }
 432     }
 433
 434     std::function<Real(std::vector<Real> &)> m_integrand;
 435     uint64_t m_num_threads;
 436     uint64_t m_seed;
 437     boost::atomic<Real> m_error_goal;
 438     boost::atomic<bool> m_done;
 439     std::vector<Real> m_lbs;
 440     std::vector<Real> m_dxs;
 441     std::vector<detail::limit_classification> m_limit_types;
 442     Real m_volume;
 443     boost::atomic<uint64_t> m_total_calls;
 444     // I wanted these to be vectors rather than maps,
 445     // but you can't resize a vector of atomics.
 446     std::unique_ptr<boost::atomic<uint64_t>[]> m_thread_calls;
 447     boost::atomic<Real> m_variance;
 448     std::unique_ptr<boost::atomic<Real>[]> m_thread_Ss;
 449     boost::atomic<Real> m_avg;
 450     std::unique_ptr<boost::atomic<Real>[]> m_thread_averages;
 451     std::chrono::time_point<std::chrono::system_clock> m_start;
 452     std::exception_ptr m_exception;
 453 };
 454
 455 }}}
 456 #endif