]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/math/example/geometric_examples.cpp
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / boost / libs / math / example / geometric_examples.cpp
CommitLineData
7c673cae
FG
1// geometric_examples.cpp
2
3// Copyright Paul A. Bristow 2010.
4
5// Use, modification and distribution are subject to the
6// Boost Software License, Version 1.0.
7// (See accompanying file LICENSE_1_0.txt
8// or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10// This file is written to be included from a Quickbook .qbk document.
11// It can still be compiled by the C++ compiler, and run.
12// Any output can also be added here as comment or included or pasted in elsewhere.
13// Caution: this file contains Quickbook markup as well as code
14// and comments: don't change any of the special comment markups!
15
16// Examples of using the geometric distribution.
17
18//[geometric_eg1_1
19/*`
20For this example, we will opt to #define two macros to control
21the error and discrete handling policies.
22For this simple example, we want to avoid throwing
23an exception (the default policy) and just return infinity.
24We want to treat the distribution as if it was continuous,
25so we choose a discrete_quantile policy of real,
26rather than the default policy integer_round_outwards.
27*/
28#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
29#define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
30/*`
31[caution It is vital to #include distributions etc *after* the above #defines]
32After that we need some includes to provide easy access to the negative binomial distribution,
33and we need some std library iostream, of course.
34*/
35#include <boost/math/distributions/geometric.hpp>
36 // for geometric_distribution
37 using ::boost::math::geometric_distribution; //
38 using ::boost::math::geometric; // typedef provides default type is double.
39 using ::boost::math::pdf; // Probability mass function.
40 using ::boost::math::cdf; // Cumulative density function.
41 using ::boost::math::quantile;
42
43#include <boost/math/distributions/negative_binomial.hpp>
44 // for negative_binomial_distribution
45 using boost::math::negative_binomial; // typedef provides default type is double.
46
47#include <boost/math/distributions/normal.hpp>
48 // for negative_binomial_distribution
49 using boost::math::normal; // typedef provides default type is double.
50
51#include <iostream>
52 using std::cout; using std::endl;
53 using std::noshowpoint; using std::fixed; using std::right; using std::left;
54#include <iomanip>
55 using std::setprecision; using std::setw;
56
57#include <limits>
58 using std::numeric_limits;
59//] [geometric_eg1_1]
60
61int main()
62{
63 cout <<"Geometric distribution example" << endl;
64 cout << endl;
65
66 cout.precision(4); // But only show a few for this example.
67 try
68 {
69//[geometric_eg1_2
70/*`
71It is always sensible to use try and catch blocks because defaults policies are to
72throw an exception if anything goes wrong.
73
74Simple try'n'catch blocks (see below) will ensure that you get a
75helpful error message instead of an abrupt (and silent) program abort.
76
77[h6 Throwing a dice]
78The Geometric distribution describes the probability (/p/) of a number of failures
79to get the first success in /k/ Bernoulli trials.
80(A [@http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli trial]
81is one with only two possible outcomes, success of failure,
82and /p/ is the probability of success).
83
84Suppose an 'fair' 6-face dice is thrown repeatedly:
85*/
86 double success_fraction = 1./6; // success_fraction (p) = 0.1666
87 // (so failure_fraction is 1 - success_fraction = 5./6 = 1- 0.1666 = 0.8333)
88
89/*`If the dice is thrown repeatedly until the *first* time a /three/ appears.
f67539c2 90The probability distribution of the number of times it is thrown *not* getting a /three/
7c673cae
FG
91 (/not-a-threes/ number of failures to get a /three/)
92is a geometric distribution with the success_fraction = 1/6 = 0.1666[recur].
93
94We therefore start by constructing a geometric distribution
95with the one parameter success_fraction, the probability of success.
96*/
97 geometric g6(success_fraction); // type double by default.
98/*`
99To confirm, we can echo the success_fraction parameter of the distribution.
100*/
101 cout << "success fraction of a six-sided dice is " << g6.success_fraction() << endl;
102/*`So the probability of getting a three at the first throw (zero failures) is
103*/
104 cout << pdf(g6, 0) << endl; // 0.1667
105 cout << cdf(g6, 0) << endl; // 0.1667
106/*`Note that the cdf and pdf are identical because the is only one throw.
107If we want the probability of getting the first /three/ on the 2nd throw:
108*/
109 cout << pdf(g6, 1) << endl; // 0.1389
110
111/*`If we want the probability of getting the first /three/ on the 1st or 2nd throw
112(allowing one failure):
113*/
114 cout << "pdf(g6, 0) + pdf(g6, 1) = " << pdf(g6, 0) + pdf(g6, 1) << endl;
115/*`Or more conveniently, and more generally,
116we can use the Cumulative Distribution Function CDF.*/
117
118 cout << "cdf(g6, 1) = " << cdf(g6, 1) << endl; // 0.3056
119
120/*`If we allow many more (12) throws, the probability of getting our /three/ gets very high:*/
121 cout << "cdf(g6, 12) = " << cdf(g6, 12) << endl; // 0.9065 or 90% probability.
122/*`If we want to be much more confident, say 99%,
123we can estimate the number of throws to be this sure
124using the inverse or quantile.
125*/
126 cout << "quantile(g6, 0.99) = " << quantile(g6, 0.99) << endl; // 24.26
127/*`Note that the value returned is not an integer:
128if you want an integer result you should use either floor, round or ceil functions,
129or use the policies mechanism.
130
131See __understand_dis_quant.
132
133The geometric distribution is related to the negative binomial
134__spaces `negative_binomial_distribution(RealType r, RealType p);` with parameter /r/ = 1.
135So we could get the same result using the negative binomial,
136but using the geometric the results will be faster, and may be more accurate.
137*/
138 negative_binomial nb(1, success_fraction);
139 cout << pdf(nb, 1) << endl; // 0.1389
140 cout << cdf(nb, 1) << endl; // 0.3056
141/*`We could also the complement to express the required probability
142as 1 - 0.99 = 0.01 (and get the same result):
143*/
144 cout << "quantile(complement(g6, 1 - p)) " << quantile(complement(g6, 0.01)) << endl; // 24.26
145/*`
146Note too that Boost.Math geometric distribution is implemented as a continuous function.
147Unlike other implementations (for example R) it *uses* the number of failures as a *real* parameter,
148not as an integer. If you want this integer behaviour, you may need to enforce this by
149rounding the parameter you pass, probably rounding down, to the nearest integer.
150For example, R returns the success fraction probability for all values of failures
151from 0 to 0.999999 thus:
152[pre
153__spaces R> formatC(pgeom(0.0001,0.5, FALSE), digits=17) " 0.5"
154] [/pre]
155So in Boost.Math the equivalent is
156*/
157 geometric g05(0.5); // Probability of success = 0.5 or 50%
158 // Output all potentially significant digits for the type, here double.
159
160#ifdef BOOST_NO_CXX11_NUMERIC_LIMITS
161 int max_digits10 = 2 + (boost::math::policies::digits<double, boost::math::policies::policy<> >() * 30103UL) / 100000UL;
162 cout << "BOOST_NO_CXX11_NUMERIC_LIMITS is defined" << endl;
163#else
164 int max_digits10 = std::numeric_limits<double>::max_digits10;
165#endif
166 cout << "Show all potentially significant decimal digits std::numeric_limits<double>::max_digits10 = "
167 << max_digits10 << endl;
168 cout.precision(max_digits10); //
169
170 cout << cdf(g05, 0.0001) << endl; // returns 0.5000346561579232, not exact 0.5.
171/*`To get the R discrete behaviour, you simply need to round with,
172for example, the `floor` function.
173*/
174 cout << cdf(g05, floor(0.0001)) << endl; // returns exactly 0.5
175/*`
176[pre
177`> formatC(pgeom(0.9999999,0.5, FALSE), digits=17) [1] " 0.25"`
178`> formatC(pgeom(1.999999,0.5, FALSE), digits=17)[1] " 0.25" k = 1`
179`> formatC(pgeom(1.9999999,0.5, FALSE), digits=17)[1] "0.12500000000000003" k = 2`
180] [/pre]
181shows that R makes an arbitrary round-up decision at about 1e7 from the next integer above.
182This may be convenient in practice, and could be replicated in C++ if desired.
183
184[h6 Surveying customers to find one with a faulty product]
185A company knows from warranty claims that 2% of their products will be faulty,
186so the 'success_fraction' of finding a fault is 0.02.
187It wants to interview a purchaser of faulty products to assess their 'user experience'.
188
189To estimate how many customers they will probably need to contact
190in order to find one who has suffered from the fault,
191we first construct a geometric distribution with probability 0.02,
192and then chose a confidence, say 80%, 95%, or 99% to finding a customer with a fault.
193Finally, we probably want to round up the result to the integer above using the `ceil` function.
194(We could also use a policy, but that is hardly worthwhile for this simple application.)
195
196(This also assumes that each customer only buys one product:
197if customers bought more than one item,
198the probability of finding a customer with a fault obviously improves.)
199*/
200 cout.precision(5);
201 geometric g(0.02); // On average, 2 in 100 products are faulty.
202 double c = 0.95; // 95% confidence.
203 cout << " quantile(g, " << c << ") = " << quantile(g, c) << endl;
204
205 cout << "To be " << c * 100
206 << "% confident of finding we customer with a fault, need to survey "
207 << ceil(quantile(g, c)) << " customers." << endl; // 148
208 c = 0.99; // Very confident.
209 cout << "To be " << c * 100
210 << "% confident of finding we customer with a fault, need to survey "
211 << ceil(quantile(g, c)) << " customers." << endl; // 227
212 c = 0.80; // Only reasonably confident.
213 cout << "To be " << c * 100
214 << "% confident of finding we customer with a fault, need to survey "
215 << ceil(quantile(g, c)) << " customers." << endl; // 79
216
217/*`[h6 Basket Ball Shooters]
218According to Wikipedia, average pro basket ball players get
219[@http://en.wikipedia.org/wiki/Free_throw free throws]
220in the baskets 70 to 80 % of the time,
221but some get as high as 95%, and others as low as 50%.
222Suppose we want to compare the probabilities
223of failing to get a score only on the first or on the fifth shot?
224To start we will consider the average shooter, say 75%.
225So we construct a geometric distribution
226with success_fraction parameter 75/100 = 0.75.
227*/
228 cout.precision(2);
229 geometric gav(0.75); // Shooter averages 7.5 out of 10 in the basket.
230/*`What is probability of getting 1st try in the basket, that is with no failures? */
231 cout << "Probability of score on 1st try = " << pdf(gav, 0) << endl; // 0.75
232/*`This is, of course, the success_fraction probability 75%.
233What is the probability that the shooter only scores on the fifth shot?
234So there are 5-1 = 4 failures before the first success.*/
235 cout << "Probability of score on 5th try = " << pdf(gav, 4) << endl; // 0.0029
236/*`Now compare this with the poor and the best players success fraction.
237We need to constructing new distributions with the different success fractions,
238and then get the corresponding probability density functions values:
239*/
240 geometric gbest(0.95);
241 cout << "Probability of score on 5th try = " << pdf(gbest, 4) << endl; // 5.9e-6
242 geometric gmediocre(0.50);
243 cout << "Probability of score on 5th try = " << pdf(gmediocre, 4) << endl; // 0.031
244/*`So we can see the very much smaller chance (0.000006) of 4 failures by the best shooters,
245compared to the 0.03 of the mediocre.*/
246
247/*`[h6 Estimating failures]
248Of course one man's failure is an other man's success.
249So a fault can be defined as a 'success'.
250
251If a fault occurs once after 100 flights, then one might naively say
252that the risk of fault is obviously 1 in 100 = 1/100, a probability of 0.01.
253
254This is the best estimate we can make, but while it is the truth,
255it is not the whole truth,
256for it hides the big uncertainty when estimating from a single event.
257"One swallow doesn't make a summer."
258To show the magnitude of the uncertainty, the geometric
259(or the negative binomial) distribution can be used.
260
261If we chose the popular 95% confidence in the limits, corresponding to an alpha of 0.05,
262because we are calculating a two-sided interval, we must divide alpha by two.
263*/
264 double alpha = 0.05;
265 double k = 100; // So frequency of occurrence is 1/100.
266 cout << "Probability is failure is " << 1/k << endl;
267 double t = geometric::find_lower_bound_on_p(k, alpha/2);
268 cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
269 << t << endl; // 0.00025
270 t = geometric::find_upper_bound_on_p(k, alpha/2);
271 cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
272 << t << endl; // 0.037
273/*`So while we estimate the probability is 0.01, it might lie between 0.0003 and 0.04.
274Even if we relax our confidence to alpha = 90%, the bounds only contract to 0.0005 and 0.03.
275And if we require a high confidence, they widen to 0.00005 to 0.05.
276*/
277 alpha = 0.1; // 90% confidence.
278 t = geometric::find_lower_bound_on_p(k, alpha/2);
279 cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
280 << t << endl; // 0.0005
281 t = geometric::find_upper_bound_on_p(k, alpha/2);
282 cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
283 << t << endl; // 0.03
284
285 alpha = 0.01; // 99% confidence.
286 t = geometric::find_lower_bound_on_p(k, alpha/2);
287 cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
288 << t << endl; // 5e-005
289 t = geometric::find_upper_bound_on_p(k, alpha/2);
290 cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
291 << t << endl; // 0.052
292/*`In real life, there will usually be more than one event (fault or success),
f67539c2 293when the negative binomial, which has the necessary extra parameter, will be needed.
7c673cae
FG
294*/
295
296/*`As noted above, using a catch block is always a good idea,
297even if you hope not to use it!
298*/
299 }
300 catch(const std::exception& e)
301 { // Since we have set an overflow policy of ignore_error,
302 // an overflow exception should never be thrown.
303 std::cout << "\nMessage from thrown exception was:\n " << e.what() << std::endl;
304/*`
305For example, without a ignore domain error policy,
306if we asked for ``pdf(g, -1)`` for example,
307we would get an unhelpful abort, but with a catch:
308[pre
309Message from thrown exception was:
310 Error in function boost::math::pdf(const exponential_distribution<double>&, double):
311 Number of failures argument is -1, but must be >= 0 !
312] [/pre]
313*/
314//] [/ geometric_eg1_2]
315 }
316 return 0;
317} // int main()
318
319
320/*
321Output is:
322
323 Geometric distribution example
324
325 success fraction of a six-sided dice is 0.1667
326 0.1667
327 0.1667
328 0.1389
329 pdf(g6, 0) + pdf(g6, 1) = 0.3056
330 cdf(g6, 1) = 0.3056
331 cdf(g6, 12) = 0.9065
332 quantile(g6, 0.99) = 24.26
333 0.1389
334 0.3056
335 quantile(complement(g6, 1 - p)) 24.26
336 0.5000346561579232
337 0.5
338 quantile(g, 0.95) = 147.28
339 To be 95% confident of finding we customer with a fault, need to survey 148 customers.
340 To be 99% confident of finding we customer with a fault, need to survey 227 customers.
341 To be 80% confident of finding we customer with a fault, need to survey 79 customers.
342 Probability of score on 1st try = 0.75
343 Probability of score on 5th try = 0.0029
344 Probability of score on 5th try = 5.9e-006
345 Probability of score on 5th try = 0.031
346 Probability is failure is 0.01
347 geometric::find_lower_bound_on_p(100, 0.025) = 0.00025
348 geometric::find_upper_bound_on_p(100, 0.025) = 0.037
349 geometric::find_lower_bound_on_p(100, 0.05) = 0.00051
350 geometric::find_upper_bound_on_p(100, 0.05) = 0.03
351 geometric::find_lower_bound_on_p(100, 0.005) = 5e-005
352 geometric::find_upper_bound_on_p(100, 0.005) = 0.052
353
354*/
355
356
357
358
359
360
361
362
363
364