]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/math/quadrature/naive_monte_carlo.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / boost / math / quadrature / naive_monte_carlo.hpp
1 /*
2 * Copyright Nick Thompson, 2018
3 * Use, modification and distribution are subject to the
4 * Boost Software License, Version 1.0. (See accompanying file
5 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 */
7 #ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
8 #define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
9 #include <sstream>
10 #include <algorithm>
11 #include <vector>
12 #include <boost/atomic.hpp>
13 #include <functional>
14 #include <future>
15 #include <thread>
16 #include <initializer_list>
17 #include <utility>
18 #include <random>
19 #include <chrono>
20 #include <map>
21 #include <boost/math/policies/error_handling.hpp>
22
23 namespace boost { namespace math { namespace quadrature {
24
25 namespace detail {
26 enum class limit_classification {FINITE,
27 LOWER_BOUND_INFINITE,
28 UPPER_BOUND_INFINITE,
29 DOUBLE_INFINITE};
30 }
31
32 template<class Real, class F, class RandomNumberGenerator = std::mt19937_64, class Policy = boost::math::policies::policy<>>
33 class naive_monte_carlo
34 {
35 public:
36 naive_monte_carlo(const F& integrand,
37 std::vector<std::pair<Real, Real>> const & bounds,
38 Real error_goal,
39 bool singular = true,
40 uint64_t threads = std::thread::hardware_concurrency(),
41 uint64_t seed = 0): m_num_threads{threads}, m_seed{seed}
42 {
43 using std::numeric_limits;
44 using std::sqrt;
45 uint64_t n = bounds.size();
46 m_lbs.resize(n);
47 m_dxs.resize(n);
48 m_limit_types.resize(n);
49 m_volume = 1;
50 static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
51 for (uint64_t i = 0; i < n; ++i)
52 {
53 if (bounds[i].second <= bounds[i].first)
54 {
55 boost::math::policies::raise_domain_error(function, "The upper bound is <= the lower bound.\n", bounds[i].second, Policy());
56 return;
57 }
58 if (bounds[i].first == -numeric_limits<Real>::infinity())
59 {
60 if (bounds[i].second == numeric_limits<Real>::infinity())
61 {
62 m_limit_types[i] = detail::limit_classification::DOUBLE_INFINITE;
63 }
64 else
65 {
66 m_limit_types[i] = detail::limit_classification::LOWER_BOUND_INFINITE;
67 // Ok ok this is bad to use the second bound as the lower limit and then reflect.
68 m_lbs[i] = bounds[i].second;
69 m_dxs[i] = numeric_limits<Real>::quiet_NaN();
70 }
71 }
72 else if (bounds[i].second == numeric_limits<Real>::infinity())
73 {
74 m_limit_types[i] = detail::limit_classification::UPPER_BOUND_INFINITE;
75 if (singular)
76 {
77 // I've found that it's easier to sample on a closed set and perturb the boundary
78 // than to try to sample very close to the boundary.
79 m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
80 }
81 else
82 {
83 m_lbs[i] = bounds[i].first;
84 }
85 m_dxs[i] = numeric_limits<Real>::quiet_NaN();
86 }
87 else
88 {
89 m_limit_types[i] = detail::limit_classification::FINITE;
90 if (singular)
91 {
92 if (bounds[i].first == 0)
93 {
94 m_lbs[i] = std::numeric_limits<Real>::epsilon();
95 }
96 else
97 {
98 m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
99 }
100
101 m_dxs[i] = std::nextafter(bounds[i].second, std::numeric_limits<Real>::lowest()) - m_lbs[i];
102 }
103 else
104 {
105 m_lbs[i] = bounds[i].first;
106 m_dxs[i] = bounds[i].second - bounds[i].first;
107 }
108 m_volume *= m_dxs[i];
109 }
110 }
111
112 m_integrand = [this, &integrand](std::vector<Real> & x)->Real
113 {
114 Real coeff = m_volume;
115 for (uint64_t i = 0; i < x.size(); ++i)
116 {
117 // Variable transformation are listed at:
118 // https://en.wikipedia.org/wiki/Numerical_integration
119 // However, we've made some changes to these so that we can evaluate on a compact domain.
120 if (m_limit_types[i] == detail::limit_classification::FINITE)
121 {
122 x[i] = m_lbs[i] + x[i]*m_dxs[i];
123 }
124 else if (m_limit_types[i] == detail::limit_classification::UPPER_BOUND_INFINITE)
125 {
126 Real t = x[i];
127 Real z = 1/(1 + numeric_limits<Real>::epsilon() - t);
128 coeff *= (z*z)*(1 + numeric_limits<Real>::epsilon());
129 x[i] = m_lbs[i] + t*z;
130 }
131 else if (m_limit_types[i] == detail::limit_classification::LOWER_BOUND_INFINITE)
132 {
133 Real t = x[i];
134 Real z = 1/(t+sqrt((numeric_limits<Real>::min)()));
135 coeff *= (z*z);
136 x[i] = m_lbs[i] + (t-1)*z;
137 }
138 else
139 {
140 Real t1 = 1/(1+numeric_limits<Real>::epsilon() - x[i]);
141 Real t2 = 1/(x[i]+numeric_limits<Real>::epsilon());
142 x[i] = (2*x[i]-1)*t1*t2/4;
143 coeff *= (t1*t1+t2*t2)/4;
144 }
145 }
146 return coeff*integrand(x);
147 };
148
149 // If we don't do a single function call in the constructor,
150 // we can't do a restart.
151 std::vector<Real> x(m_lbs.size());
152
153 // If the seed is zero, that tells us to choose a random seed for the user:
154 if (seed == 0)
155 {
156 std::random_device rd;
157 seed = rd();
158 }
159
160 RandomNumberGenerator gen(seed);
161 Real inv_denom = 1/static_cast<Real>(((gen.max)()-(gen.min)()));
162
163 m_num_threads = (std::max)(m_num_threads, (uint64_t) 1);
164 m_thread_calls.reset(new boost::atomic<uint64_t>[threads]);
165 m_thread_Ss.reset(new boost::atomic<Real>[threads]);
166 m_thread_averages.reset(new boost::atomic<Real>[threads]);
167
168 Real avg = 0;
169 for (uint64_t i = 0; i < m_num_threads; ++i)
170 {
171 for (uint64_t j = 0; j < m_lbs.size(); ++j)
172 {
173 x[j] = (gen()-(gen.min)())*inv_denom;
174 }
175 Real y = m_integrand(x);
176 m_thread_averages[i] = y; // relaxed store
177 m_thread_calls[i] = 1;
178 m_thread_Ss[i] = 0;
179 avg += y;
180 }
181 avg /= m_num_threads;
182 m_avg = avg; // relaxed store
183
184 m_error_goal = error_goal; // relaxed store
185 m_start = std::chrono::system_clock::now();
186 m_done = false; // relaxed store
187 m_total_calls = m_num_threads; // relaxed store
188 m_variance = (numeric_limits<Real>::max)();
189 }
190
191 std::future<Real> integrate()
192 {
193 // Set done to false in case we wish to restart:
194 m_done.store(false); // relaxed store, no worker threads yet
195 m_start = std::chrono::system_clock::now();
196 return std::async(std::launch::async,
197 &naive_monte_carlo::m_integrate, this);
198 }
199
200 void cancel()
201 {
202 // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same.
203 // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing.
204 m_seed = m_seed*m_seed;
205 m_done = true; // relaxed store, worker threads will get the message eventually
206 // Make sure the error goal is infinite, because otherwise we'll loop when we do the final error goal check:
207 m_error_goal = (std::numeric_limits<Real>::max)();
208 }
209
210 Real variance() const
211 {
212 return m_variance.load();
213 }
214
215 Real current_error_estimate() const
216 {
217 using std::sqrt;
218 //
219 // There is a bug here: m_variance and m_total_calls get updated asynchronously
220 // and may be out of synch when we compute the error estimate, not sure if it matters though...
221 //
222 return sqrt(m_variance.load()/m_total_calls.load());
223 }
224
225 std::chrono::duration<Real> estimated_time_to_completion() const
226 {
227 auto now = std::chrono::system_clock::now();
228 std::chrono::duration<Real> elapsed_seconds = now - m_start;
229 Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load
230 if (r*r <= 1) {
231 return 0*elapsed_seconds;
232 }
233 return (r*r - 1)*elapsed_seconds;
234 }
235
236 void update_target_error(Real new_target_error)
237 {
238 m_error_goal = new_target_error; // relaxed store
239 }
240
241 Real progress() const
242 {
243 Real r = m_error_goal.load()/this->current_error_estimate(); // relaxed load
244 if (r*r >= 1)
245 {
246 return 1;
247 }
248 return r*r;
249 }
250
251 Real current_estimate() const
252 {
253 return m_avg.load();
254 }
255
256 uint64_t calls() const
257 {
258 return m_total_calls.load(); // relaxed load
259 }
260
261 private:
262
263 Real m_integrate()
264 {
265 uint64_t seed;
266 // If the user tells us to pick a seed, pick a seed:
267 if (m_seed == 0)
268 {
269 std::random_device rd;
270 seed = rd();
271 }
272 else // use the seed we are given:
273 {
274 seed = m_seed;
275 }
276 RandomNumberGenerator gen(seed);
277 int max_repeat_tries = 5;
278 do{
279
280 if (max_repeat_tries < 5)
281 {
282 m_done = false;
283
284 #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
285 std::cout << "Failed to achieve required tolerance first time through..\n";
286 std::cout << " variance = " << m_variance << std::endl;
287 std::cout << " average = " << m_avg << std::endl;
288 std::cout << " total calls = " << m_total_calls << std::endl;
289
290 for (std::size_t i = 0; i < m_num_threads; ++i)
291 std::cout << " thread_calls[" << i << "] = " << m_thread_calls[i] << std::endl;
292 for (std::size_t i = 0; i < m_num_threads; ++i)
293 std::cout << " thread_averages[" << i << "] = " << m_thread_averages[i] << std::endl;
294 for (std::size_t i = 0; i < m_num_threads; ++i)
295 std::cout << " thread_Ss[" << i << "] = " << m_thread_Ss[i] << std::endl;
296 #endif
297 }
298
299 std::vector<std::thread> threads(m_num_threads);
300 for (uint64_t i = 0; i < threads.size(); ++i)
301 {
302 threads[i] = std::thread(&naive_monte_carlo::m_thread_monte, this, i, gen());
303 }
304 do {
305 std::this_thread::sleep_for(std::chrono::milliseconds(100));
306 uint64_t total_calls = 0;
307 for (uint64_t i = 0; i < m_num_threads; ++i)
308 {
309 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
310 total_calls += t_calls;
311 }
312 Real variance = 0;
313 Real avg = 0;
314 for (uint64_t i = 0; i < m_num_threads; ++i)
315 {
316 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
317 // Will this overflow? Not hard to remove . . .
318 avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls);
319 variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
320 }
321 m_avg.store(avg, boost::memory_order::release);
322 m_variance.store(variance / (total_calls - 1), boost::memory_order::release);
323 m_total_calls = total_calls; // relaxed store, it's just for user feedback
324 // Allow cancellation:
325 if (m_done) // relaxed load
326 {
327 break;
328 }
329 } while (m_total_calls < 2048 || this->current_error_estimate() > m_error_goal.load(boost::memory_order::consume));
330 // Error bound met; signal the threads:
331 m_done = true; // relaxed store, threads will get the message in the end
332 std::for_each(threads.begin(), threads.end(),
333 std::mem_fn(&std::thread::join));
334 if (m_exception)
335 {
336 std::rethrow_exception(m_exception);
337 }
338 // Incorporate their work into the final estimate:
339 uint64_t total_calls = 0;
340 for (uint64_t i = 0; i < m_num_threads; ++i)
341 {
342 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
343 total_calls += t_calls;
344 }
345 Real variance = 0;
346 Real avg = 0;
347
348 for (uint64_t i = 0; i < m_num_threads; ++i)
349 {
350 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume);
351 // Averages weighted by the number of calls the thread made:
352 avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls);
353 variance += m_thread_Ss[i].load(boost::memory_order::relaxed);
354 }
355 m_avg.store(avg, boost::memory_order::release);
356 m_variance.store(variance / (total_calls - 1), boost::memory_order::release);
357 m_total_calls = total_calls; // relaxed store, this is just user feedback
358
359 // Sometimes, the master will observe the variance at a very "good" (or bad?) moment,
360 // Then the threads proceed to find the variance is much greater by the time they hear the message to stop.
361 // This *WOULD* make sure that the final error estimate is within the error bounds.
362 }
363 while ((--max_repeat_tries >= 0) && (this->current_error_estimate() > m_error_goal));
364
365 return m_avg.load(boost::memory_order::consume);
366 }
367
368 void m_thread_monte(uint64_t thread_index, uint64_t seed)
369 {
370 using std::numeric_limits;
371 try
372 {
373 std::vector<Real> x(m_lbs.size());
374 RandomNumberGenerator gen(seed);
375 Real inv_denom = (Real) 1/(Real)( (gen.max)() - (gen.min)() );
376 Real M1 = m_thread_averages[thread_index].load(boost::memory_order::consume);
377 Real S = m_thread_Ss[thread_index].load(boost::memory_order::consume);
378 // Kahan summation is required or the value of the integrand will go on a random walk during long computations.
379 // See the implementation discussion.
380 // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges!
381 // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware)
382 Real compensator = 0;
383 uint64_t k = m_thread_calls[thread_index].load(boost::memory_order::consume);
384 while (!m_done) // relaxed load
385 {
386 int j = 0;
387 // If we don't have a certain number of calls before an update, we can easily terminate prematurely
388 // because the variance estimate is way too low. This magic number is a reasonable compromise, as 1/sqrt(2048) = 0.02,
389 // so it should recover 2 digits if the integrand isn't poorly behaved, and if it is, it should discover that before premature termination.
390 // Of course if the user has 64 threads, then this number is probably excessive.
391 int magic_calls_before_update = 2048;
392 while (j++ < magic_calls_before_update)
393 {
394 for (uint64_t i = 0; i < m_lbs.size(); ++i)
395 {
396 x[i] = (gen() - (gen.min)())*inv_denom;
397 }
398 Real f = m_integrand(x);
399 using std::isfinite;
400 if (!isfinite(f))
401 {
402 // The call to m_integrand transform x, so this error message states the correct node.
403 std::stringstream os;
404 os << "Your integrand was evaluated at {";
405 for (uint64_t i = 0; i < x.size() -1; ++i)
406 {
407 os << x[i] << ", ";
408 }
409 os << x[x.size() -1] << "}, and returned " << f << std::endl;
410 static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
411 boost::math::policies::raise_domain_error(function, os.str().c_str(), /*this is a dummy arg to make it compile*/ 7.2, Policy());
412 }
413 ++k;
414 Real term = (f - M1)/k;
415 Real y1 = term - compensator;
416 Real M2 = M1 + y1;
417 compensator = (M2 - M1) - y1;
418 S += (f - M1)*(f - M2);
419 M1 = M2;
420 }
421 m_thread_averages[thread_index].store(M1, boost::memory_order::release);
422 m_thread_Ss[thread_index].store(S, boost::memory_order::release);
423 m_thread_calls[thread_index].store(k, boost::memory_order::release);
424 }
425 }
426 catch (...)
427 {
428 // Signal the other threads that the computation is ruined:
429 m_done = true; // relaxed store
430 m_exception = std::current_exception();
431 }
432 }
433
434 std::function<Real(std::vector<Real> &)> m_integrand;
435 uint64_t m_num_threads;
436 uint64_t m_seed;
437 boost::atomic<Real> m_error_goal;
438 boost::atomic<bool> m_done;
439 std::vector<Real> m_lbs;
440 std::vector<Real> m_dxs;
441 std::vector<detail::limit_classification> m_limit_types;
442 Real m_volume;
443 boost::atomic<uint64_t> m_total_calls;
444 // I wanted these to be vectors rather than maps,
445 // but you can't resize a vector of atomics.
446 std::unique_ptr<boost::atomic<uint64_t>[]> m_thread_calls;
447 boost::atomic<Real> m_variance;
448 std::unique_ptr<boost::atomic<Real>[]> m_thread_Ss;
449 boost::atomic<Real> m_avg;
450 std::unique_ptr<boost::atomic<Real>[]> m_thread_averages;
451 std::chrono::time_point<std::chrono::system_clock> m_start;
452 std::exception_ptr m_exception;
453 };
454
455 }}}
456 #endif