1 // boost\math\distributions\binomial.hpp
3 // Copyright John Maddock 2006.
4 // Copyright Paul A. Bristow 2007.
6 // Use, modification and distribution are subject to the
7 // Boost Software License, Version 1.0.
8 // (See accompanying file LICENSE_1_0.txt
9 // or copy at http://www.boost.org/LICENSE_1_0.txt)
11 // http://en.wikipedia.org/wiki/binomial_distribution
13 // Binomial distribution is the discrete probability distribution of
14 // the number (k) of successes, in a sequence of
15 // n independent (yes or no, success or failure) Bernoulli trials.
17 // It expresses the probability of a number of events occurring in a fixed time
18 // if these events occur with a known average rate (probability of success),
19 // and are independent of the time since the last event.
21 // The number of cars that pass through a certain point on a road during a given period of time.
22 // The number of spelling mistakes a secretary makes while typing a single page.
23 // The number of phone calls at a call center per minute.
24 // The number of times a web server is accessed per minute.
25 // The number of light bulbs that burn out in a certain amount of time.
26 // The number of roadkill found per unit length of road
28 // http://en.wikipedia.org/wiki/binomial_distribution
30 // Given a sample of N measured values k[i],
31 // we wish to estimate the value of the parameter x (mean)
32 // of the binomial population from which the sample was drawn.
33 // To calculate the maximum likelihood value = 1/N sum i = 1 to N of k[i]
35 // Also may want a function for EXACTLY k.
37 // And probability that there are EXACTLY k occurrences is
38 // exp(-x) * pow(x, k) / factorial(k)
39 // where x is expected occurrences (mean) during the given interval.
40 // For example, if events occur, on average, every 4 min,
41 // and we are interested in number of events occurring in 10 min,
42 // then x = 10/4 = 2.5
44 // http://www.itl.nist.gov/div898/handbook/eda/section3/eda366i.htm
46 // The binomial distribution is used when there are
47 // exactly two mutually exclusive outcomes of a trial.
48 // These outcomes are appropriately labeled "success" and "failure".
49 // The binomial distribution is used to obtain
50 // the probability of observing x successes in N trials,
51 // with the probability of success on a single trial denoted by p.
52 // The binomial distribution assumes that p is fixed for all trials.
54 // P(x, p, n) = n!/(x! * (n-x)!) * p^x * (1-p)^(n-x)
56 // http://mathworld.wolfram.com/BinomialCoefficient.html
58 // The binomial coefficient (n; k) is the number of ways of picking
59 // k unordered outcomes from n possibilities,
60 // also known as a combination or combinatorial number.
61 // The symbols _nC_k and (n; k) are used to denote a binomial coefficient,
62 // and are sometimes read as "n choose k."
63 // (n; k) therefore gives the number of k-subsets possible out of a set of n distinct items.
66 // The 2-subsets of {1,2,3,4} are the six pairs {1,2}, {1,3}, {1,4}, {2,3}, {2,4}, and {3,4}, so (4; 2)==6.
68 // http://functions.wolfram.com/GammaBetaErf/Binomial/ for evaluation.
70 // But note that the binomial distribution
71 // (like others including the poisson, negative binomial & Bernoulli)
72 // is strictly defined as a discrete function: only integral values of k are envisaged.
73 // However because of the method of calculation using a continuous gamma function,
74 // it is convenient to treat it as if a continous function,
75 // and permit non-integral values of k.
76 // To enforce the strict mathematical model, users should use floor or ceil functions
77 // on k outside this function to ensure that k is integral.
79 #ifndef BOOST_MATH_SPECIAL_BINOMIAL_HPP
80 #define BOOST_MATH_SPECIAL_BINOMIAL_HPP
82 #include <boost/math/distributions/fwd.hpp>
83 #include <boost/math/special_functions/beta.hpp> // for incomplete beta.
84 #include <boost/math/distributions/complement.hpp> // complements
85 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
86 #include <boost/math/distributions/detail/inv_discrete_quantile.hpp> // error checks
87 #include <boost/math/special_functions/fpclassify.hpp> // isnan.
88 #include <boost/math/tools/roots.hpp> // for root finding.
97 template <class RealType, class Policy>
98 class binomial_distribution;
100 namespace binomial_detail{
101 // common error checking routines for binomial distribution functions:
102 template <class RealType, class Policy>
103 inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol)
105 if((N < 0) || !(boost::math::isfinite)(N))
107 *result = policies::raise_domain_error<RealType>(
109 "Number of Trials argument is %1%, but must be >= 0 !", N, pol);
114 template <class RealType, class Policy>
115 inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
117 if((p < 0) || (p > 1) || !(boost::math::isfinite)(p))
119 *result = policies::raise_domain_error<RealType>(
121 "Success fraction argument is %1%, but must be >= 0 and <= 1 !", p, pol);
126 template <class RealType, class Policy>
127 inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol)
129 return check_success_fraction(
130 function, p, result, pol)
132 function, N, result, pol);
134 template <class RealType, class Policy>
135 inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol)
137 if(check_dist(function, N, p, result, pol) == false)
139 if((k < 0) || !(boost::math::isfinite)(k))
141 *result = policies::raise_domain_error<RealType>(
143 "Number of Successes argument is %1%, but must be >= 0 !", k, pol);
148 *result = policies::raise_domain_error<RealType>(
150 "Number of Successes argument is %1%, but must be <= Number of Trials !", k, pol);
155 template <class RealType, class Policy>
156 inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol)
158 if((check_dist(function, N, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false)
163 template <class T, class Policy>
164 T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol)
169 // standard deviation:
170 T sigma = sqrt(n * sf * (1 - sf));
172 T sk = (1 - 2 * sf) / sigma;
174 // T k = (1 - 6 * sf * (1 - sf) ) / (n * sf * (1 - sf));
175 // Get the inverse of a std normal distribution:
176 T x = boost::math::erfc_inv(p > q ? 2 * q : 2 * p, pol) * constants::root_two<T>();
181 // w is correction term due to skewness
182 T w = x + sk * (x2 - 1) / 6;
184 // Add on correction due to kurtosis.
185 // Disabled for now, seems to make things worse?
188 w += k * x * (x2 - 3) / 24 + sk * sk * x * (2 * x2 - 5) / -36;
191 if(w < tools::min_value<T>())
192 return sqrt(tools::min_value<T>());
198 template <class RealType, class Policy>
199 RealType quantile_imp(const binomial_distribution<RealType, Policy>& dist, const RealType& p, const RealType& q, bool comp)
200 { // Quantile or Percent Point Binomial function.
201 // Return the number of expected successes k,
202 // for a given probability p.
205 BOOST_MATH_STD_USING // ADL of std names
207 RealType trials = dist.trials();
208 RealType success_fraction = dist.success_fraction();
209 if(false == binomial_detail::check_dist_and_prob(
210 "boost::math::quantile(binomial_distribution<%1%> const&, %1%)",
222 { // There may actually be no answer to this question,
223 // since the probability of zero successes may be non-zero,
224 // but zero is the best we can do:
228 { // Probability of n or fewer successes is always one,
229 // so n is the most sensible answer here:
232 if (p <= pow(1 - success_fraction, trials))
233 { // p <= pdf(dist, 0) == cdf(dist, 0)
234 return 0; // So the only reasonable result is zero.
235 } // And root finder would fail otherwise.
236 if(success_fraction == 1)
237 { // our formulae break down in this case:
238 return p > 0.5f ? trials : 0;
241 // Solve for quantile numerically:
243 RealType guess = binomial_detail::inverse_binomial_cornish_fisher(trials, success_fraction, p, q, Policy());
246 factor = 1.01f; // guess is pretty accurate
247 else if((trials > 10) && (trials - 1 > guess) && (guess > 3))
248 factor = 1.15f; // less accurate but OK.
251 // pretty inaccurate guess in this area:
252 if(guess > trials / 64)
258 guess = trials / 1024;
261 factor = 2; // trials largish, but in far tails.
263 typedef typename Policy::discrete_quantile_type discrete_quantile_type;
264 boost::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
265 return detail::inverse_discrete_quantile(
272 discrete_quantile_type(),
278 template <class RealType = double, class Policy = policies::policy<> >
279 class binomial_distribution
282 typedef RealType value_type;
283 typedef Policy policy_type;
285 binomial_distribution(RealType n = 1, RealType p = 0.5) : m_n(n), m_p(p)
286 { // Default n = 1 is the Bernoulli distribution
287 // with equal probability of 'heads' or 'tails.
289 binomial_detail::check_dist(
290 "boost::math::binomial_distribution<%1%>::binomial_distribution",
294 } // binomial_distribution constructor.
296 RealType success_fraction() const
300 RealType trials() const
301 { // Total number of trials.
306 clopper_pearson_exact_interval,
307 jeffreys_prior_interval
311 // Estimation of the success fraction parameter.
312 // The best estimate is actually simply successes/trials,
313 // these functions are used
314 // to obtain confidence intervals for the success fraction.
316 static RealType find_lower_bound_on_p(
319 RealType probability,
320 interval_type t = clopper_pearson_exact_interval)
322 static const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p";
325 if(false == binomial_detail::check_dist_and_k(
326 function, trials, RealType(0), successes, &result, Policy())
328 binomial_detail::check_dist_and_prob(
329 function, trials, RealType(0), probability, &result, Policy()))
335 // NOTE!!! The Clopper Pearson formula uses "successes" not
336 // "successes+1" as usual to get the lower bound,
337 // see http://www.itl.nist.gov/div898/handbook/prc/section2/prc241.htm
338 return (t == clopper_pearson_exact_interval) ? ibeta_inv(successes, trials - successes + 1, probability, static_cast<RealType*>(0), Policy())
339 : ibeta_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast<RealType*>(0), Policy());
341 static RealType find_upper_bound_on_p(
344 RealType probability,
345 interval_type t = clopper_pearson_exact_interval)
347 static const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p";
350 if(false == binomial_detail::check_dist_and_k(
351 function, trials, RealType(0), successes, &result, Policy())
353 binomial_detail::check_dist_and_prob(
354 function, trials, RealType(0), probability, &result, Policy()))
357 if(trials == successes)
360 return (t == clopper_pearson_exact_interval) ? ibetac_inv(successes + 1, trials - successes, probability, static_cast<RealType*>(0), Policy())
361 : ibetac_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast<RealType*>(0), Policy());
363 // Estimate number of trials parameter:
365 // "How many trials do I need to be P% sure of seeing k events?"
367 // "How many trials can I have to be P% sure of seeing fewer than k events?"
369 static RealType find_minimum_number_of_trials(
370 RealType k, // number of events
371 RealType p, // success fraction
372 RealType alpha) // risk level
374 static const char* function = "boost::math::binomial_distribution<%1%>::find_minimum_number_of_trials";
377 if(false == binomial_detail::check_dist_and_k(
378 function, k, p, k, &result, Policy())
380 binomial_detail::check_dist_and_prob(
381 function, k, p, alpha, &result, Policy()))
384 result = ibetac_invb(k + 1, p, alpha, Policy()); // returns n - k
388 static RealType find_maximum_number_of_trials(
389 RealType k, // number of events
390 RealType p, // success fraction
391 RealType alpha) // risk level
393 static const char* function = "boost::math::binomial_distribution<%1%>::find_maximum_number_of_trials";
396 if(false == binomial_detail::check_dist_and_k(
397 function, k, p, k, &result, Policy())
399 binomial_detail::check_dist_and_prob(
400 function, k, p, alpha, &result, Policy()))
403 result = ibeta_invb(k + 1, p, alpha, Policy()); // returns n - k
408 RealType m_n; // Not sure if this shouldn't be an int?
409 RealType m_p; // success_fraction
410 }; // template <class RealType, class Policy> class binomial_distribution
412 typedef binomial_distribution<> binomial;
413 // typedef binomial_distribution<double> binomial;
414 // IS now included since no longer a name clash with function binomial.
415 //typedef binomial_distribution<double> binomial; // Reserved name of type double.
417 template <class RealType, class Policy>
418 const std::pair<RealType, RealType> range(const binomial_distribution<RealType, Policy>& dist)
419 { // Range of permissible values for random variable k.
420 using boost::math::tools::max_value;
421 return std::pair<RealType, RealType>(static_cast<RealType>(0), dist.trials());
424 template <class RealType, class Policy>
425 const std::pair<RealType, RealType> support(const binomial_distribution<RealType, Policy>& dist)
426 { // Range of supported values for random variable k.
427 // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
428 return std::pair<RealType, RealType>(static_cast<RealType>(0), dist.trials());
431 template <class RealType, class Policy>
432 inline RealType mean(const binomial_distribution<RealType, Policy>& dist)
433 { // Mean of Binomial distribution = np.
434 return dist.trials() * dist.success_fraction();
437 template <class RealType, class Policy>
438 inline RealType variance(const binomial_distribution<RealType, Policy>& dist)
439 { // Variance of Binomial distribution = np(1-p).
440 return dist.trials() * dist.success_fraction() * (1 - dist.success_fraction());
443 template <class RealType, class Policy>
444 RealType pdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
445 { // Probability Density/Mass Function.
446 BOOST_FPU_EXCEPTION_GUARD
448 BOOST_MATH_STD_USING // for ADL of std functions
450 RealType n = dist.trials();
453 RealType result = 0; // initialization silences some compiler warnings
454 if(false == binomial_detail::check_dist_and_k(
455 "boost::math::pdf(binomial_distribution<%1%> const&, %1%)",
457 dist.success_fraction(),
464 // Special cases of success_fraction, regardless of k successes and regardless of n trials.
465 if (dist.success_fraction() == 0)
466 { // probability of zero successes is 1:
467 return static_cast<RealType>(k == 0 ? 1 : 0);
469 if (dist.success_fraction() == 1)
470 { // probability of n successes is 1:
471 return static_cast<RealType>(k == n ? 1 : 0);
473 // k argument may be integral, signed, or unsigned, or floating point.
474 // If necessary, it has already been promoted from an integral type.
477 return 1; // Probability = 1 = certainty.
480 { // binomial coeffic (n 0) = 1,
482 return pow(1 - dist.success_fraction(), n);
485 { // binomial coeffic (n n) = 1,
487 return pow(dist.success_fraction(), k); // * pow((1 - dist.success_fraction()), (n - k)) = 1
490 // Probability of getting exactly k successes
491 // if C(n, k) is the binomial coefficient then:
493 // f(k; n,p) = C(n, k) * p^k * (1-p)^(n-k)
494 // = (n!/(k!(n-k)!)) * p^k * (1-p)^(n-k)
495 // = (tgamma(n+1) / (tgamma(k+1)*tgamma(n-k+1))) * p^k * (1-p)^(n-k)
496 // = p^k (1-p)^(n-k) / (beta(k+1, n-k+1) * (n+1))
497 // = ibeta_derivative(k+1, n-k+1, p) / (n+1)
499 using boost::math::ibeta_derivative; // a, b, x
500 return ibeta_derivative(k+1, n-k+1, dist.success_fraction(), Policy()) / (n+1);
504 template <class RealType, class Policy>
505 inline RealType cdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
506 { // Cumulative Distribution Function Binomial.
507 // The random variate k is the number of successes in n trials.
508 // k argument may be integral, signed, or unsigned, or floating point.
509 // If necessary, it has already been promoted from an integral type.
511 // Returns the sum of the terms 0 through k of the Binomial Probability Density/Mass:
519 // The terms are not summed directly instead
520 // the incomplete beta integral is employed,
521 // according to the formula:
522 // P = I[1-p]( n-k, k+1).
523 // = 1 - I[p](k + 1, n - k)
525 BOOST_MATH_STD_USING // for ADL of std functions
527 RealType n = dist.trials();
528 RealType p = dist.success_fraction();
532 if(false == binomial_detail::check_dist_and_k(
533 "boost::math::cdf(binomial_distribution<%1%> const&, %1%)",
546 // Special cases, regardless of k.
548 { // This need explanation:
549 // the pdf is zero for all cases except when k == 0.
550 // For zero p the probability of zero successes is one.
551 // Therefore the cdf is always 1:
552 // the probability of k or *fewer* successes is always 1
553 // if there are never any successes!
557 { // This is correct but needs explanation:
559 // all the cdf and pdf values are zero *except* when k == n,
560 // and that case has been handled above already.
564 // P = I[1-p](n - k, k + 1)
565 // = 1 - I[p](k + 1, n - k)
566 // Use of ibetac here prevents cancellation errors in calculating
567 // 1-p if p is very small, perhaps smaller than machine epsilon.
569 // Note that we do not use a finite sum here, since the incomplete
570 // beta uses a finite sum internally for integer arguments, so
571 // we'll just let it take care of the necessary logic.
573 return ibetac(k + 1, n - k, p, Policy());
576 template <class RealType, class Policy>
577 inline RealType cdf(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
578 { // Complemented Cumulative Distribution Function Binomial.
579 // The random variate k is the number of successes in n trials.
580 // k argument may be integral, signed, or unsigned, or floating point.
581 // If necessary, it has already been promoted from an integral type.
583 // Returns the sum of the terms k+1 through n of the Binomial Probability Density/Mass:
591 // The terms are not summed directly instead
592 // the incomplete beta integral is employed,
593 // according to the formula:
594 // Q = 1 -I[1-p]( n-k, k+1).
595 // = I[p](k + 1, n - k)
597 BOOST_MATH_STD_USING // for ADL of std functions
599 RealType const& k = c.param;
600 binomial_distribution<RealType, Policy> const& dist = c.dist;
601 RealType n = dist.trials();
602 RealType p = dist.success_fraction();
606 if(false == binomial_detail::check_dist_and_k(
607 "boost::math::cdf(binomial_distribution<%1%> const&, %1%)",
617 { // Probability of greater than n successes is necessarily zero:
621 // Special cases, regardless of k.
624 // This need explanation: the pdf is zero for all
625 // cases except when k == 0. For zero p the probability
626 // of zero successes is one. Therefore the cdf is always
627 // 1: the probability of *more than* k successes is always 0
628 // if there are never any successes!
633 // This needs explanation, when p = 1
634 // we always have n successes, so the probability
635 // of more than k successes is 1 as long as k < n.
636 // The k == n case has already been handled above.
640 // Calculate cdf binomial using the incomplete beta function.
641 // Q = 1 -I[1-p](n - k, k + 1)
642 // = I[p](k + 1, n - k)
643 // Use of ibeta here prevents cancellation errors in calculating
644 // 1-p if p is very small, perhaps smaller than machine epsilon.
646 // Note that we do not use a finite sum here, since the incomplete
647 // beta uses a finite sum internally for integer arguments, so
648 // we'll just let it take care of the necessary logic.
650 return ibeta(k + 1, n - k, p, Policy());
653 template <class RealType, class Policy>
654 inline RealType quantile(const binomial_distribution<RealType, Policy>& dist, const RealType& p)
656 return binomial_detail::quantile_imp(dist, p, RealType(1-p), false);
659 template <class RealType, class Policy>
660 RealType quantile(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
662 return binomial_detail::quantile_imp(c.dist, RealType(1-c.param), c.param, true);
665 template <class RealType, class Policy>
666 inline RealType mode(const binomial_distribution<RealType, Policy>& dist)
668 BOOST_MATH_STD_USING // ADL of std functions.
669 RealType p = dist.success_fraction();
670 RealType n = dist.trials();
671 return floor(p * (n + 1));
674 template <class RealType, class Policy>
675 inline RealType median(const binomial_distribution<RealType, Policy>& dist)
676 { // Bounds for the median of the negative binomial distribution
677 // VAN DE VEN R. ; WEBER N. C. ;
678 // Univ. Sydney, school mathematics statistics, Sydney N.S.W. 2006, AUSTRALIE
679 // Metrika (Metrika) ISSN 0026-1335 CODEN MTRKA8
680 // 1993, vol. 40, no3-4, pp. 185-189 (4 ref.)
682 // Bounds for median and 50 percetage point of binomial and negative binomial distribution
683 // Metrika, ISSN 0026-1335 (Print) 1435-926X (Online)
684 // Volume 41, Number 1 / December, 1994, DOI 10.1007/BF01895303
685 BOOST_MATH_STD_USING // ADL of std functions.
686 RealType p = dist.success_fraction();
687 RealType n = dist.trials();
688 // Wikipedia says one of floor(np) -1, floor (np), floor(np) +1
689 return floor(p * n); // Chose the middle value.
692 template <class RealType, class Policy>
693 inline RealType skewness(const binomial_distribution<RealType, Policy>& dist)
695 BOOST_MATH_STD_USING // ADL of std functions.
696 RealType p = dist.success_fraction();
697 RealType n = dist.trials();
698 return (1 - 2 * p) / sqrt(n * p * (1 - p));
701 template <class RealType, class Policy>
702 inline RealType kurtosis(const binomial_distribution<RealType, Policy>& dist)
704 RealType p = dist.success_fraction();
705 RealType n = dist.trials();
706 return 3 - 6 / n + 1 / (n * p * (1 - p));
709 template <class RealType, class Policy>
710 inline RealType kurtosis_excess(const binomial_distribution<RealType, Policy>& dist)
712 RealType p = dist.success_fraction();
714 RealType n = dist.trials();
715 return (1 - 6 * p * q) / (n * p * q);
721 // This include must be at the end, *after* the accessors
722 // for this distribution have been defined, in order to
723 // keep compilers that support two-phase lookup happy.
724 #include <boost/math/distributions/detail/derived_accessors.hpp>
726 #endif // BOOST_MATH_SPECIAL_BINOMIAL_HPP