ceph/src/boost/libs/math/example/binomial_quiz_example.cpp

   1 // Copyright Paul A. Bristow 2007, 2009, 2010
   2 // Copyright John Maddock 2006
   3
   4 // Use, modification and distribution are subject to the
   5 // Boost Software License, Version 1.0.
   6 // (See accompanying file LICENSE_1_0.txt
   7 // or copy at http://www.boost.org/LICENSE_1_0.txt)
   8
   9 // binomial_examples_quiz.cpp
  10
  11 // Simple example of computing probabilities and quantiles for a binomial random variable
  12 // representing the correct guesses on a multiple-choice test.
  13
  14 // source http://www.stat.wvu.edu/SRS/Modules/Binomial/test.html
  15
  16 //[binomial_quiz_example1
  17 /*`
  18 A multiple choice test has four possible answers to each of 16 questions.
  19 A student guesses the answer to each question,
  20 so the probability of getting a correct answer on any given question is
  21 one in four, a quarter, 1/4, 25% or fraction 0.25.
  22 The conditions of the binomial experiment are assumed to be met:
  23 n = 16 questions constitute the trials;
  24 each question results in one of two possible outcomes (correct or incorrect);
  25 the probability of being correct is 0.25 and is constant if no knowledge about the subject is assumed;
  26 the questions are answered independently if the student's answer to a question
  27 in no way influences his/her answer to another question.
  28
  29 First, we need to be able to use the binomial distribution constructor
  30 (and some std input/output, of course).
  31 */
  32
  33 #include <boost/math/distributions/binomial.hpp>
  34   using boost::math::binomial;
  35
  36 #include <iostream>
  37   using std::cout; using std::endl;
  38   using std::ios; using std::flush; using std::left; using std::right; using std::fixed;
  39 #include <iomanip>
  40   using std::setw; using std::setprecision;
  41 #include <exception>
  42
  43
  44
  45 //][/binomial_quiz_example1]
  46
  47 int main()
  48 {
  49   try
  50   {
  51   cout << "Binomial distribution example - guessing in a quiz." << endl;
  52 //[binomial_quiz_example2
  53 /*`
  54 The number of correct answers, X, is distributed as a binomial random variable
  55 with binomial distribution parameters: questions n and success fraction probability p.
  56 So we construct a binomial distribution:
  57 */
  58   int questions = 16; // All the questions in the quiz.
  59   int answers = 4; // Possible answers to each question.
  60   double success_fraction = 1. / answers; // If a random guess, p = 1/4 = 0.25.
  61   binomial quiz(questions, success_fraction);
  62 /*`
  63 and display the distribution parameters we used thus:
  64 */
  65   cout << "In a quiz with " << quiz.trials()
  66     << " questions and with a probability of guessing right of "
  67     << quiz.success_fraction() * 100 << " %"
  68     << " or 1 in " << static_cast<int>(1. / quiz.success_fraction()) << endl;
  69 /*`
  70 Show a few probabilities of just guessing:
  71 */
  72   cout << "Probability of getting none right is " << pdf(quiz, 0) << endl; // 0.010023
  73   cout << "Probability of getting exactly one right is " << pdf(quiz, 1) << endl;
  74   cout << "Probability of getting exactly two right is " << pdf(quiz, 2) << endl;
  75   int pass_score = 11;
  76   cout << "Probability of getting exactly " << pass_score << " answers right by chance is "
  77     << pdf(quiz, pass_score) << endl;
  78   cout << "Probability of getting all " << questions << " answers right by chance is "
  79     << pdf(quiz, questions) << endl;
  80 /*`
  81 [pre
  82 Probability of getting none right is 0.0100226
  83 Probability of getting exactly one right is 0.0534538
  84 Probability of getting exactly two right is 0.133635
  85 Probability of getting exactly 11 right is 0.000247132
  86 Probability of getting exactly all 16 answers right by chance is 2.32831e-010
  87 ]
  88 These don't give any encouragement to guessers!
  89
  90 We can tabulate the 'getting exactly right' ( == ) probabilities thus:
  91 */
  92   cout << "\n" "Guessed Probability" << right << endl;
  93   for (int successes = 0; successes <= questions; successes++)
  94   {
  95     double probability = pdf(quiz, successes);
  96     cout << setw(2) << successes << "      " << probability << endl;
  97   }
  98   cout << endl;
  99 /*`
 100 [pre
 101 Guessed Probability
 102  0      0.0100226
 103  1      0.0534538
 104  2      0.133635
 105  3      0.207876
 106  4      0.225199
 107  5      0.180159
 108  6      0.110097
 109  7      0.0524273
 110  8      0.0196602
 111  9      0.00582526
 112 10      0.00135923
 113 11      0.000247132
 114 12      3.43239e-005
 115 13      3.5204e-006
 116 14      2.51457e-007
 117 15      1.11759e-008
 118 16      2.32831e-010
 119 ]
 120 Then we can add the probabilities of some 'exactly right' like this:
 121 */
 122   cout << "Probability of getting none or one right is " << pdf(quiz, 0) + pdf(quiz, 1) << endl;
 123
 124 /*`
 125 [pre
 126 Probability of getting none or one right is 0.0634764
 127 ]
 128 But if more than a couple of scores are involved, it is more convenient (and may be more accurate)
 129 to use the Cumulative Distribution Function (cdf) instead:
 130 */
 131   cout << "Probability of getting none or one right is " << cdf(quiz, 1) << endl;
 132 /*`
 133 [pre
 134 Probability of getting none or one right is 0.0634764
 135 ]
 136 Since the cdf is inclusive, we can get the probability of getting up to 10 right ( <= )
 137 */
 138   cout << "Probability of getting <= 10 right (to fail) is " << cdf(quiz, 10) << endl;
 139 /*`
 140 [pre
 141 Probability of getting <= 10 right (to fail) is 0.999715
 142 ]
 143 To get the probability of getting 11 or more right (to pass),
 144 it is tempting to use ``1 - cdf(quiz, 10)`` to get the probability of > 10
 145 */
 146   cout << "Probability of getting > 10 right (to pass) is " << 1 - cdf(quiz, 10) << endl;
 147 /*`
 148 [pre
 149 Probability of getting > 10 right (to pass) is 0.000285239
 150 ]
 151 But this should be resisted in favor of using the __complements function (see __why_complements).
 152 */
 153   cout << "Probability of getting > 10 right (to pass) is " << cdf(complement(quiz, 10)) << endl;
 154 /*`
 155 [pre
 156 Probability of getting > 10 right (to pass) is 0.000285239
 157 ]
 158 And we can check that these two, <= 10 and > 10,  add up to unity.
 159 */
 160 BOOST_ASSERT((cdf(quiz, 10) + cdf(complement(quiz, 10))) == 1.);
 161 /*`
 162 If we want a < rather than a <= test, because the CDF is inclusive, we must subtract one from the score.
 163 */
 164   cout << "Probability of getting less than " << pass_score
 165     << " (< " << pass_score << ") answers right by guessing is "
 166     << cdf(quiz, pass_score -1) << endl;
 167 /*`
 168 [pre
 169 Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
 170 ]
 171 and similarly to get a >= rather than a > test
 172 we also need to subtract one from the score (and can again check the sum is unity).
 173 This is because if the cdf is /inclusive/,
 174 then its complement must be /exclusive/ otherwise there would be one possible
 175 outcome counted twice!
 176 */
 177   cout << "Probability of getting at least " << pass_score
 178     << "(>= " << pass_score << ") answers right by guessing is "
 179     << cdf(complement(quiz, pass_score-1))
 180     << ", only 1 in " << 1/cdf(complement(quiz, pass_score-1)) << endl;
 181
 182   BOOST_ASSERT((cdf(quiz, pass_score -1) + cdf(complement(quiz, pass_score-1))) == 1);
 183
 184 /*`
 185 [pre
 186 Probability of getting at least 11 (>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
 187 ]
 188 Finally we can tabulate some probabilities:
 189 */
 190   cout << "\n" "At most (<=)""\n""Guessed OK   Probability" << right << endl;
 191   for (int score = 0; score <= questions; score++)
 192   {
 193     cout << setw(2) << score << "           " << setprecision(10)
 194       << cdf(quiz, score) << endl;
 195   }
 196   cout << endl;
 197 /*`
 198 [pre
 199 At most (<=)
 200 Guessed OK   Probability
 201  0           0.01002259576
 202  1           0.0634764398
 203  2           0.1971110499
 204  3           0.4049871101
 205  4           0.6301861752
 206  5           0.8103454274
 207  6           0.9204427481
 208  7           0.9728700437
 209  8           0.9925302796
 210  9           0.9983555346
 211 10           0.9997147608
 212 11           0.9999618928
 213 12           0.9999962167
 214 13           0.9999997371
 215 14           0.9999999886
 216 15           0.9999999998
 217 16           1
 218 ]
 219 */
 220   cout << "\n" "At least (>)""\n""Guessed OK   Probability" << right << endl;
 221   for (int score = 0; score <= questions; score++)
 222   {
 223     cout << setw(2) << score << "           "  << setprecision(10)
 224       << cdf(complement(quiz, score)) << endl;
 225   }
 226 /*`
 227 [pre
 228 At least (>)
 229 Guessed OK   Probability
 230  0           0.9899774042
 231  1           0.9365235602
 232  2           0.8028889501
 233  3           0.5950128899
 234  4           0.3698138248
 235  5           0.1896545726
 236  6           0.07955725188
 237  7           0.02712995629
 238  8           0.00746972044
 239  9           0.001644465374
 240 10           0.0002852391917
 241 11           3.810715862e-005
 242 12           3.783265129e-006
 243 13           2.628657967e-007
 244 14           1.140870154e-008
 245 15           2.328306437e-010
 246 16           0
 247 ]
 248 We now consider the probabilities of *ranges* of correct guesses.
 249
 250 First, calculate the probability of getting a range of guesses right,
 251 by adding the exact probabilities of each from low ... high.
 252 */
 253   int low = 3; // Getting at least 3 right.
 254   int high = 5; // Getting as most 5 right.
 255   double sum = 0.;
 256   for (int i = low; i <= high; i++)
 257   {
 258     sum += pdf(quiz, i);
 259   }
 260   cout.precision(4);
 261   cout << "Probability of getting between "
 262     << low << " and " << high << " answers right by guessing is "
 263     << sum  << endl; // 0.61323
 264 /*`
 265 [pre
 266 Probability of getting between 3 and 5 answers right by guessing is 0.6132
 267 ]
 268 Or, usually better, we can use the difference of cdfs instead:
 269 */
 270   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
 271     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 0.61323
 272 /*`
 273 [pre
 274 Probability of getting between 3 and 5 answers right by guessing is 0.6132
 275 ]
 276 And we can also try a few more combinations of high and low choices:
 277 */
 278   low = 1; high = 6;
 279   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
 280     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 and 6 P= 0.91042
 281   low = 1; high = 8;
 282   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
 283     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 <= x 8 P = 0.9825
 284   low = 4; high = 4;
 285   cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
 286     <<  cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 4 <= x 4 P = 0.22520
 287
 288 /*`
 289 [pre
 290 Probability of getting between 1 and 6 answers right by guessing is 0.9104
 291 Probability of getting between 1 and 8 answers right by guessing is 0.9825
 292 Probability of getting between 4 and 4 answers right by guessing is 0.2252
 293 ]
 294 [h4 Using Binomial distribution moments]
 295 Using moments of the distribution, we can say more about the spread of results from guessing.
 296 */
 297   cout << "By guessing, on average, one can expect to get " << mean(quiz) << " correct answers." << endl;
 298   cout << "Standard deviation is " << standard_deviation(quiz) << endl;
 299   cout << "So about 2/3 will lie within 1 standard deviation and get between "
 300     <<  ceil(mean(quiz) - standard_deviation(quiz))  << " and "
 301     << floor(mean(quiz) + standard_deviation(quiz)) << " correct." << endl;
 302   cout << "Mode (the most frequent) is " << mode(quiz) << endl;
 303   cout << "Skewness is " << skewness(quiz) << endl;
 304
 305 /*`
 306 [pre
 307 By guessing, on average, one can expect to get 4 correct answers.
 308 Standard deviation is 1.732
 309 So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
 310 Mode (the most frequent) is 4
 311 Skewness is 0.2887
 312 ]
 313 [h4 Quantiles]
 314 The quantiles (percentiles or percentage points) for a few probability levels:
 315 */
 316   cout << "Quartiles " << quantile(quiz, 0.25) << " to "
 317     << quantile(complement(quiz, 0.25)) << endl; // Quartiles
 318   cout << "1 standard deviation " << quantile(quiz, 0.33) << " to "
 319     << quantile(quiz, 0.67) << endl; // 1 sd
 320   cout << "Deciles " << quantile(quiz, 0.1)  << " to "
 321     << quantile(complement(quiz, 0.1))<< endl; // Deciles
 322   cout << "5 to 95% " << quantile(quiz, 0.05)  << " to "
 323     << quantile(complement(quiz, 0.05))<< endl; // 5 to 95%
 324   cout << "2.5 to 97.5% " << quantile(quiz, 0.025) << " to "
 325     <<  quantile(complement(quiz, 0.025)) << endl; // 2.5 to 97.5%
 326   cout << "2 to 98% " << quantile(quiz, 0.02)  << " to "
 327     << quantile(complement(quiz, 0.02)) << endl; //  2 to 98%
 328
 329   cout << "If guessing then percentiles 1 to 99% will get " << quantile(quiz, 0.01)
 330     << " to " << quantile(complement(quiz, 0.01)) << " right." << endl;
 331 /*`
 332 Notice that these output integral values because the default policy is `integer_round_outwards`.
 333 [pre
 334 Quartiles 2 to 5
 335 1 standard deviation 2 to 5
 336 Deciles 1 to 6
 337 5 to 95% 0 to 7
 338 2.5 to 97.5% 0 to 8
 339 2 to 98% 0 to 8
 340 ]
 341 */
 342
 343 //] [/binomial_quiz_example2]
 344
 345 //[discrete_quantile_real
 346 /*`
 347 Quantiles values are controlled by the __understand_dis_quant  quantile policy chosen.
 348 The default is `integer_round_outwards`,
 349 so the lower quantile is rounded down, and the upper quantile is rounded up.
 350
 351 But we might believe that the real values tell us a little more - see __math_discrete.
 352
 353 We could control the policy for *all* distributions by
 354
 355   #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
 356
 357   at the head of the program would make this policy apply
 358 to this *one, and only*, translation unit.
 359
 360 Or we can now create a (typedef for) policy that has discrete quantiles real
 361 (here avoiding any 'using namespaces ...' statements):
 362 */
 363   using boost::math::policies::policy;
 364   using boost::math::policies::discrete_quantile;
 365   using boost::math::policies::real;
 366   using boost::math::policies::integer_round_outwards; // Default.
 367   typedef boost::math::policies::policy<discrete_quantile<real> > real_quantile_policy;
 368 /*`
 369 Add a custom binomial distribution called ``real_quantile_binomial`` that uses ``real_quantile_policy``
 370 */
 371   using boost::math::binomial_distribution;
 372   typedef binomial_distribution<double, real_quantile_policy> real_quantile_binomial;
 373 /*`
 374 Construct an object of this custom distribution:
 375 */
 376   real_quantile_binomial quiz_real(questions, success_fraction);
 377 /*`
 378 And use this to show some quantiles - that now have real rather than integer values.
 379 */
 380   cout << "Quartiles " << quantile(quiz, 0.25) << " to "
 381     << quantile(complement(quiz_real, 0.25)) << endl; // Quartiles 2 to 4.6212
 382   cout << "1 standard deviation " << quantile(quiz_real, 0.33) << " to "
 383     << quantile(quiz_real, 0.67) << endl; // 1 sd 2.6654 4.194
 384   cout << "Deciles " << quantile(quiz_real, 0.1)  << " to "
 385     << quantile(complement(quiz_real, 0.1))<< endl; // Deciles 1.3487 5.7583
 386   cout << "5 to 95% " << quantile(quiz_real, 0.05)  << " to "
 387     << quantile(complement(quiz_real, 0.05))<< endl; // 5 to 95% 0.83739 6.4559
 388   cout << "2.5 to 97.5% " << quantile(quiz_real, 0.025) << " to "
 389     <<  quantile(complement(quiz_real, 0.025)) << endl; // 2.5 to 97.5% 0.42806 7.0688
 390   cout << "2 to 98% " << quantile(quiz_real, 0.02)  << " to "
 391     << quantile(complement(quiz_real, 0.02)) << endl; //  2 to 98% 0.31311 7.7880
 392
 393   cout << "If guessing, then percentiles 1 to 99% will get " << quantile(quiz_real, 0.01)
 394     << " to " << quantile(complement(quiz_real, 0.01)) << " right." << endl;
 395 /*`
 396 [pre
 397 Real Quantiles
 398 Quartiles 2 to 4.621
 399 1 standard deviation 2.665 to 4.194
 400 Deciles 1.349 to 5.758
 401 5 to 95% 0.8374 to 6.456
 402 2.5 to 97.5% 0.4281 to 7.069
 403 2 to 98% 0.3131 to 7.252
 404 If guessing then percentiles 1 to 99% will get 0 to 7.788 right.
 405 ]
 406 */
 407
 408 //] [/discrete_quantile_real]
 409   }
 410   catch(const std::exception& e)
 411   { // Always useful to include try & catch blocks because
 412     // default policies are to throw exceptions on arguments that cause
 413     // errors like underflow, overflow.
 414     // Lacking try & catch blocks, the program will abort without a message below,
 415     // which may give some helpful clues as to the cause of the exception.
 416     std::cout <<
 417       "\n""Message from thrown exception was:\n   " << e.what() << std::endl;
 418   }
 419   return 0;
 420 } // int main()
 421
 422
 423
 424 /*
 425
 426 Output is:
 427
 428 BAutorun "i:\boost-06-05-03-1300\libs\math\test\Math_test\debug\binomial_quiz_example.exe"
 429 Binomial distribution example - guessing in a quiz.
 430 In a quiz with 16 questions and with a probability of guessing right of 25 % or 1 in 4
 431 Probability of getting none right is 0.0100226
 432 Probability of getting exactly one right is 0.0534538
 433 Probability of getting exactly two right is 0.133635
 434 Probability of getting exactly 11 answers right by chance is 0.000247132
 435 Probability of getting all 16 answers right by chance is 2.32831e-010
 436 Guessed Probability
 437  0      0.0100226
 438  1      0.0534538
 439  2      0.133635
 440  3      0.207876
 441  4      0.225199
 442  5      0.180159
 443  6      0.110097
 444  7      0.0524273
 445  8      0.0196602
 446  9      0.00582526
 447 10      0.00135923
 448 11      0.000247132
 449 12      3.43239e-005
 450 13      3.5204e-006
 451 14      2.51457e-007
 452 15      1.11759e-008
 453 16      2.32831e-010
 454 Probability of getting none or one right is 0.0634764
 455 Probability of getting none or one right is 0.0634764
 456 Probability of getting <= 10 right (to fail) is 0.999715
 457 Probability of getting > 10 right (to pass) is 0.000285239
 458 Probability of getting > 10 right (to pass) is 0.000285239
 459 Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
 460 Probability of getting at least 11(>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
 461 At most (<=)
 462 Guessed OK   Probability
 463  0           0.01002259576
 464  1           0.0634764398
 465  2           0.1971110499
 466  3           0.4049871101
 467  4           0.6301861752
 468  5           0.8103454274
 469  6           0.9204427481
 470  7           0.9728700437
 471  8           0.9925302796
 472  9           0.9983555346
 473 10           0.9997147608
 474 11           0.9999618928
 475 12           0.9999962167
 476 13           0.9999997371
 477 14           0.9999999886
 478 15           0.9999999998
 479 16           1
 480 At least (>)
 481 Guessed OK   Probability
 482  0           0.9899774042
 483  1           0.9365235602
 484  2           0.8028889501
 485  3           0.5950128899
 486  4           0.3698138248
 487  5           0.1896545726
 488  6           0.07955725188
 489  7           0.02712995629
 490  8           0.00746972044
 491  9           0.001644465374
 492 10           0.0002852391917
 493 11           3.810715862e-005
 494 12           3.783265129e-006
 495 13           2.628657967e-007
 496 14           1.140870154e-008
 497 15           2.328306437e-010
 498 16           0
 499 Probability of getting between 3 and 5 answers right by guessing is 0.6132
 500 Probability of getting between 3 and 5 answers right by guessing is 0.6132
 501 Probability of getting between 1 and 6 answers right by guessing is 0.9104
 502 Probability of getting between 1 and 8 answers right by guessing is 0.9825
 503 Probability of getting between 4 and 4 answers right by guessing is 0.2252
 504 By guessing, on average, one can expect to get 4 correct answers.
 505 Standard deviation is 1.732
 506 So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
 507 Mode (the most frequent) is 4
 508 Skewness is 0.2887
 509 Quartiles 2 to 5
 510 1 standard deviation 2 to 5
 511 Deciles 1 to 6
 512 5 to 95% 0 to 7
 513 2.5 to 97.5% 0 to 8
 514 2 to 98% 0 to 8
 515 If guessing then percentiles 1 to 99% will get 0 to 8 right.
 516 Quartiles 2 to 4.621
 517 1 standard deviation 2.665 to 4.194
 518 Deciles 1.349 to 5.758
 519 5 to 95% 0.8374 to 6.456
 520 2.5 to 97.5% 0.4281 to 7.069
 521 2 to 98% 0.3131 to 7.252
 522 If guessing, then percentiles 1 to 99% will get 0 to 7.788 right.
 523
 524 */
 525