ceph/src/boost/libs/math/example/geometric_examples.cpp

   1 // geometric_examples.cpp
   2
   3 // Copyright Paul A. Bristow 2010.
   4
   5 // Use, modification and distribution are subject to the
   6 // Boost Software License, Version 1.0.
   7 // (See accompanying file LICENSE_1_0.txt
   8 // or copy at http://www.boost.org/LICENSE_1_0.txt)
   9
  10 // This file is written to be included from a Quickbook .qbk document.
  11 // It can still be compiled by the C++ compiler, and run.
  12 // Any output can also be added here as comment or included or pasted in elsewhere.
  13 // Caution: this file contains Quickbook markup as well as code
  14 // and comments: don't change any of the special comment markups!
  15
  16 // Examples of using the geometric distribution.
  17
  18 //[geometric_eg1_1
  19 /*`
  20 For this example, we will opt to #define two macros to control
  21 the error and discrete handling policies.
  22 For this simple example, we want to avoid throwing
  23 an exception (the default policy) and just return infinity.
  24 We want to treat the distribution as if it was continuous,
  25 so we choose a discrete_quantile policy of real,
  26 rather than the default policy integer_round_outwards.
  27 */
  28 #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
  29 #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
  30 /*`
  31 [caution It is vital to #include distributions etc *after* the above #defines]
  32 After that we need some includes to provide easy access to the negative binomial distribution,
  33 and we need some std library iostream, of course.
  34 */
  35 #include <boost/math/distributions/geometric.hpp>
  36   // for geometric_distribution
  37   using ::boost::math::geometric_distribution; //
  38   using ::boost::math::geometric; // typedef provides default type is double.
  39   using  ::boost::math::pdf; // Probability mass function.
  40   using  ::boost::math::cdf; // Cumulative density function.
  41   using  ::boost::math::quantile;
  42
  43 #include <boost/math/distributions/negative_binomial.hpp>
  44   // for negative_binomial_distribution
  45   using boost::math::negative_binomial; // typedef provides default type is double.
  46
  47 #include <boost/math/distributions/normal.hpp>
  48   // for negative_binomial_distribution
  49   using boost::math::normal; // typedef provides default type is double.
  50
  51 #include <iostream>
  52   using std::cout; using std::endl;
  53   using std::noshowpoint; using std::fixed; using std::right; using std::left;
  54 #include <iomanip>
  55   using std::setprecision; using std::setw;
  56
  57 #include <limits>
  58   using std::numeric_limits;
  59 //] [geometric_eg1_1]
  60
  61 int main()
  62 {
  63   cout <<"Geometric distribution example" << endl;
  64   cout << endl;
  65
  66   cout.precision(4); // But only show a few for this example.
  67   try
  68   {
  69 //[geometric_eg1_2
  70 /*`
  71 It is always sensible to use try and catch blocks because defaults policies are to
  72 throw an exception if anything goes wrong.
  73
  74 Simple try'n'catch blocks (see below) will ensure that you get a
  75 helpful error message instead of an abrupt (and silent) program abort.
  76
  77 [h6 Throwing a dice]
  78 The Geometric distribution describes the probability (/p/) of a number of failures
  79 to get the first success in /k/ Bernoulli trials.
  80 (A [@http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli trial]
  81 is one with only two possible outcomes, success of failure,
  82 and /p/ is the probability of success).
  83
  84 Suppose an 'fair' 6-face dice is thrown repeatedly:
  85 */
  86     double success_fraction = 1./6; // success_fraction (p) = 0.1666
  87     // (so failure_fraction is 1 - success_fraction = 5./6 = 1- 0.1666 = 0.8333)
  88
  89 /*`If the dice is thrown repeatedly until the *first* time a /three/ appears.
  90 The probability distribution of the number of times it is thrown *not* getting a /three/
  91  (/not-a-threes/ number of failures to get a /three/)
  92 is a geometric distribution with the success_fraction = 1/6 = 0.1666[recur].
  93
  94 We therefore start by constructing a geometric distribution
  95 with the one parameter success_fraction, the probability of success.
  96 */
  97     geometric g6(success_fraction); // type double by default.
  98 /*`
  99 To confirm, we can echo the success_fraction parameter of the distribution.
 100 */
 101     cout << "success fraction of a six-sided dice is " << g6.success_fraction() << endl;
 102 /*`So the probability of getting a three at the first throw (zero failures) is
 103 */
 104     cout << pdf(g6, 0) << endl; // 0.1667
 105     cout << cdf(g6, 0) << endl; // 0.1667
 106 /*`Note that the cdf and pdf are identical because the is only one throw.
 107 If we want the probability of getting the first /three/ on the 2nd throw:
 108 */
 109     cout << pdf(g6, 1) << endl; // 0.1389
 110
 111 /*`If we want the probability of getting the first /three/ on the 1st or 2nd throw
 112 (allowing one failure):
 113 */
 114     cout << "pdf(g6, 0) + pdf(g6, 1) = " << pdf(g6, 0) + pdf(g6, 1) << endl;
 115 /*`Or more conveniently, and more generally,
 116 we can use the Cumulative Distribution Function CDF.*/
 117
 118     cout << "cdf(g6, 1) = " << cdf(g6, 1) << endl; // 0.3056
 119
 120 /*`If we allow many more (12) throws, the probability of getting our /three/ gets very high:*/
 121     cout << "cdf(g6, 12) = " << cdf(g6, 12) << endl; // 0.9065 or 90% probability.
 122 /*`If we want to be much more confident, say 99%,
 123 we can estimate the number of throws to be this sure
 124 using the inverse or quantile.
 125 */
 126     cout << "quantile(g6, 0.99) = " << quantile(g6, 0.99) << endl; // 24.26
 127 /*`Note that the value returned is not an integer:
 128 if you want an integer result you should use either floor, round or ceil functions,
 129 or use the policies mechanism.
 130
 131 See __understand_dis_quant.
 132
 133 The geometric distribution is related to the negative binomial
 134 __spaces `negative_binomial_distribution(RealType r, RealType p);` with parameter /r/ = 1.
 135 So we could get the same result using the negative binomial,
 136 but using the geometric the results will be faster, and may be more accurate.
 137 */
 138     negative_binomial nb(1, success_fraction);
 139     cout << pdf(nb, 1) << endl; // 0.1389
 140     cout << cdf(nb, 1) << endl; // 0.3056
 141 /*`We could also the complement to express the required probability
 142 as 1 - 0.99 = 0.01 (and get the same result):
 143 */
 144     cout << "quantile(complement(g6, 1 - p))  " << quantile(complement(g6, 0.01)) << endl; // 24.26
 145 /*`
 146 Note too that Boost.Math geometric distribution is implemented as a continuous function.
 147 Unlike other implementations (for example R) it *uses* the number of failures as a *real* parameter,
 148 not as an integer. If you want this integer behaviour, you may need to enforce this by
 149 rounding the parameter you pass, probably rounding down, to the nearest integer.
 150 For example, R returns the success fraction probability for all values of failures
 151 from 0 to 0.999999 thus:
 152 [pre
 153 __spaces R> formatC(pgeom(0.0001,0.5, FALSE), digits=17) "               0.5"
 154 ] [/pre]
 155 So in Boost.Math the equivalent is
 156 */
 157     geometric g05(0.5);  // Probability of success = 0.5 or 50%
 158     // Output all potentially significant digits for the type, here double.
 159
 160 #ifdef BOOST_NO_CXX11_NUMERIC_LIMITS
 161   int max_digits10 = 2 + (boost::math::policies::digits<double, boost::math::policies::policy<> >() * 30103UL) / 100000UL;
 162   cout << "BOOST_NO_CXX11_NUMERIC_LIMITS is defined" << endl;
 163 #else
 164   int max_digits10 = std::numeric_limits<double>::max_digits10;
 165 #endif
 166   cout << "Show all potentially significant decimal digits std::numeric_limits<double>::max_digits10 = "
 167     << max_digits10 << endl;
 168   cout.precision(max_digits10); //
 169
 170     cout << cdf(g05, 0.0001) << endl; // returns 0.5000346561579232, not exact 0.5.
 171 /*`To get the R discrete behaviour, you simply need to round with,
 172 for example, the `floor` function.
 173 */
 174     cout << cdf(g05, floor(0.0001)) << endl; // returns exactly 0.5
 175 /*`
 176 [pre
 177 `> formatC(pgeom(0.9999999,0.5, FALSE), digits=17) [1] "              0.25"`
 178 `> formatC(pgeom(1.999999,0.5, FALSE), digits=17)[1] "              0.25" k = 1`
 179 `> formatC(pgeom(1.9999999,0.5, FALSE), digits=17)[1] "0.12500000000000003" k = 2`
 180 ] [/pre]
 181 shows that R makes an arbitrary round-up decision at about 1e7 from the next integer above.
 182 This may be convenient in practice, and could be replicated in C++ if desired.
 183
 184 [h6 Surveying customers to find one with a faulty product]
 185 A company knows from warranty claims that 2% of their products will be faulty,
 186 so the 'success_fraction' of finding a fault is 0.02.
 187 It wants to interview a purchaser of faulty products to assess their 'user experience'.
 188
 189 To estimate how many customers they will probably need to contact
 190 in order to find one who has suffered from the fault,
 191 we first construct a geometric distribution with probability 0.02,
 192 and then chose a confidence, say 80%, 95%, or 99% to finding a customer with a fault.
 193 Finally, we probably want to round up the result to the integer above using the `ceil` function.
 194 (We could also use a policy, but that is hardly worthwhile for this simple application.)
 195
 196 (This also assumes that each customer only buys one product:
 197 if customers bought more than one item,
 198 the probability of finding a customer with a fault obviously improves.)
 199 */
 200     cout.precision(5);
 201     geometric g(0.02); // On average, 2 in 100 products are faulty.
 202     double c = 0.95; // 95% confidence.
 203     cout << " quantile(g, " << c << ") = " << quantile(g, c) << endl;
 204
 205     cout << "To be " << c * 100
 206       << "% confident of finding we customer with a fault, need to survey "
 207       <<  ceil(quantile(g, c)) << " customers." << endl; // 148
 208     c = 0.99; // Very confident.
 209     cout << "To be " << c * 100
 210       << "% confident of finding we customer with a fault, need to survey "
 211       <<  ceil(quantile(g, c)) << " customers." << endl; // 227
 212     c = 0.80; // Only reasonably confident.
 213     cout << "To be " << c * 100
 214       << "% confident of finding we customer with a fault, need to survey "
 215       <<  ceil(quantile(g, c)) << " customers." << endl; // 79
 216
 217 /*`[h6 Basket Ball Shooters]
 218 According to Wikipedia, average pro basket ball players get
 219 [@http://en.wikipedia.org/wiki/Free_throw free throws]
 220 in the baskets 70 to 80 % of the time,
 221 but some get as high as 95%, and others as low as 50%.
 222 Suppose we want to compare the probabilities
 223 of failing to get a score only on the first or on the fifth shot?
 224 To start we will consider the average shooter, say 75%.
 225 So we construct a geometric distribution
 226 with success_fraction parameter 75/100 = 0.75.
 227 */
 228     cout.precision(2);
 229     geometric gav(0.75); // Shooter averages 7.5 out of 10 in the basket.
 230 /*`What is probability of getting 1st try in the basket, that is with no failures? */
 231     cout << "Probability of score on 1st try = " << pdf(gav, 0) << endl; // 0.75
 232 /*`This is, of course, the success_fraction probability 75%.
 233 What is the probability that the shooter only scores on the fifth shot?
 234 So there are 5-1 = 4 failures before the first success.*/
 235     cout << "Probability of score on 5th try = " << pdf(gav, 4) << endl; // 0.0029
 236 /*`Now compare this with the poor and the best players success fraction.
 237 We need to constructing new distributions with the different success fractions,
 238 and then get the corresponding probability density functions values:
 239 */
 240     geometric gbest(0.95);
 241     cout << "Probability of score on 5th try = " << pdf(gbest, 4) << endl; // 5.9e-6
 242     geometric gmediocre(0.50);
 243     cout << "Probability of score on 5th try = " << pdf(gmediocre, 4) << endl; // 0.031
 244 /*`So we can see the very much smaller chance (0.000006) of 4 failures by the best shooters,
 245 compared to the 0.03 of the mediocre.*/
 246
 247 /*`[h6 Estimating failures]
 248 Of course one man's failure is an other man's success.
 249 So a fault can be defined as a 'success'.
 250
 251 If a fault occurs once after 100 flights, then one might naively say
 252 that the risk of fault is obviously 1 in 100 = 1/100, a probability of 0.01.
 253
 254 This is the best estimate we can make, but while it is the truth,
 255 it is not the whole truth,
 256 for it hides the big uncertainty when estimating from a single event.
 257 "One swallow doesn't make a summer."
 258 To show the magnitude of the uncertainty, the geometric
 259 (or the negative binomial) distribution can be used.
 260
 261 If we chose the popular 95% confidence in the limits, corresponding to an alpha of 0.05,
 262 because we are calculating a two-sided interval, we must divide alpha by two.
 263 */
 264     double alpha = 0.05;
 265     double k = 100; // So frequency of occurrence is 1/100.
 266     cout << "Probability is failure is " << 1/k << endl;
 267     double t = geometric::find_lower_bound_on_p(k, alpha/2);
 268     cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 269       << t << endl; // 0.00025
 270     t = geometric::find_upper_bound_on_p(k, alpha/2);
 271     cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 272       << t << endl; // 0.037
 273 /*`So while we estimate the probability is 0.01, it might lie between 0.0003 and 0.04.
 274 Even if we relax our confidence to alpha = 90%, the bounds only contract to 0.0005 and 0.03.
 275 And if we require a high confidence, they widen to 0.00005 to 0.05.
 276 */
 277     alpha = 0.1; // 90% confidence.
 278     t = geometric::find_lower_bound_on_p(k, alpha/2);
 279     cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 280       << t << endl; // 0.0005
 281     t = geometric::find_upper_bound_on_p(k, alpha/2);
 282     cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 283       << t << endl; // 0.03
 284
 285     alpha = 0.01; // 99% confidence.
 286     t = geometric::find_lower_bound_on_p(k, alpha/2);
 287     cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 288       << t << endl; // 5e-005
 289     t = geometric::find_upper_bound_on_p(k, alpha/2);
 290     cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = "
 291         << t << endl; // 0.052
 292 /*`In real life, there will usually be more than one event (fault or success),
 293 when the negative binomial, which has the necessary extra parameter, will be needed.
 294 */
 295
 296 /*`As noted above, using a catch block is always a good idea,
 297 even if you hope not to use it!
 298 */
 299   }
 300   catch(const std::exception& e)
 301   { // Since we have set an overflow policy of ignore_error,
 302     // an overflow exception should never be thrown.
 303      std::cout << "\nMessage from thrown exception was:\n " << e.what() << std::endl;
 304 /*`
 305 For example, without a ignore domain error policy,
 306 if we asked for ``pdf(g, -1)`` for example,
 307 we would get an unhelpful abort, but with a catch:
 308 [pre
 309 Message from thrown exception was:
 310  Error in function boost::math::pdf(const exponential_distribution<double>&, double):
 311  Number of failures argument is -1, but must be >= 0 !
 312 ] [/pre]
 313 */
 314 //] [/ geometric_eg1_2]
 315   }
 316   return 0;
 317 }  // int main()
 318
 319
 320 /*
 321 Output is:
 322
 323   Geometric distribution example
 324
 325   success fraction of a six-sided dice is 0.1667
 326   0.1667
 327   0.1667
 328   0.1389
 329   pdf(g6, 0) + pdf(g6, 1) = 0.3056
 330   cdf(g6, 1) = 0.3056
 331   cdf(g6, 12) = 0.9065
 332   quantile(g6, 0.99) = 24.26
 333   0.1389
 334   0.3056
 335   quantile(complement(g6, 1 - p))  24.26
 336   0.5000346561579232
 337   0.5
 338    quantile(g, 0.95) = 147.28
 339   To be 95% confident of finding we customer with a fault, need to survey 148 customers.
 340   To be 99% confident of finding we customer with a fault, need to survey 227 customers.
 341   To be 80% confident of finding we customer with a fault, need to survey 79 customers.
 342   Probability of score on 1st try = 0.75
 343   Probability of score on 5th try = 0.0029
 344   Probability of score on 5th try = 5.9e-006
 345   Probability of score on 5th try = 0.031
 346   Probability is failure is 0.01
 347   geometric::find_lower_bound_on_p(100, 0.025) = 0.00025
 348   geometric::find_upper_bound_on_p(100, 0.025) = 0.037
 349   geometric::find_lower_bound_on_p(100, 0.05) = 0.00051
 350   geometric::find_upper_bound_on_p(100, 0.05) = 0.03
 351   geometric::find_lower_bound_on_p(100, 0.005) = 5e-005
 352   geometric::find_upper_bound_on_p(100, 0.005) = 0.052
 353
 354 */
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364