]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/math/doc/distributions/negative_binomial.qbk
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / math / doc / distributions / negative_binomial.qbk
CommitLineData
7c673cae
FG
1[section:negative_binomial_dist Negative Binomial Distribution]
2
3``#include <boost/math/distributions/negative_binomial.hpp>``
4
5 namespace boost{ namespace math{
6
7 template <class RealType = double,
8 class ``__Policy`` = ``__policy_class`` >
9 class negative_binomial_distribution;
10
11 typedef negative_binomial_distribution<> negative_binomial;
12
13 template <class RealType, class ``__Policy``>
14 class negative_binomial_distribution
15 {
16 public:
17 typedef RealType value_type;
18 typedef Policy policy_type;
19 // Constructor from successes and success_fraction:
20 negative_binomial_distribution(RealType r, RealType p);
21
22 // Parameter accessors:
23 RealType success_fraction() const;
24 RealType successes() const;
25
26 // Bounds on success fraction:
27 static RealType find_lower_bound_on_p(
28 RealType trials,
29 RealType successes,
30 RealType probability); // alpha
31 static RealType find_upper_bound_on_p(
32 RealType trials,
33 RealType successes,
34 RealType probability); // alpha
35
36 // Estimate min/max number of trials:
37 static RealType find_minimum_number_of_trials(
38 RealType k, // Number of failures.
39 RealType p, // Success fraction.
40 RealType probability); // Probability threshold alpha.
41 static RealType find_maximum_number_of_trials(
42 RealType k, // Number of failures.
43 RealType p, // Success fraction.
44 RealType probability); // Probability threshold alpha.
45 };
46
47 }} // namespaces
48
49The class type `negative_binomial_distribution` represents a
50[@http://en.wikipedia.org/wiki/Negative_binomial_distribution negative_binomial distribution]:
51it is used when there are exactly two mutually exclusive outcomes of a
52[@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]:
53these outcomes are labelled "success" and "failure".
54
55For k + r Bernoulli trials each with success fraction p, the
56negative_binomial distribution gives the probability of observing
57k failures and r successes with success on the last trial.
58The negative_binomial distribution
59assumes that success_fraction p is fixed for all (k + r) trials.
60
61[note The random variable for the negative binomial distribution is the number of trials,
62(the number of successes is a fixed property of the distribution)
63whereas for the binomial,
64the random variable is the number of successes, for a fixed number of trials.]
65
66It has the PDF:
67
68[equation neg_binomial_ref]
69
70The following graph illustrate how the PDF varies as the success fraction
71/p/ changes:
72
73[graph negative_binomial_pdf_1]
74
75Alternatively, this graph shows how the shape of the PDF varies as
76the number of successes changes:
77
78[graph negative_binomial_pdf_2]
79
80[h4 Related Distributions]
81
82The name negative binomial distribution is reserved by some to the
83case where the successes parameter r is an integer.
84This integer version is also called the
85[@http://mathworld.wolfram.com/PascalDistribution.html Pascal distribution].
86
87This implementation uses real numbers for the computation throughout
88(because it uses the *real-valued* incomplete beta function family of functions).
89This real-valued version is also called the Polya Distribution.
90
91The Poisson distribution is a generalization of the Pascal distribution,
92where the success parameter r is an integer: to obtain the Pascal
93distribution you must ensure that an integer value is provided for r,
94and take integer values (floor or ceiling) from functions that return
95a number of successes.
96
97For large values of r (successes), the negative binomial distribution
98converges to the Poisson distribution.
99
100The geometric distribution is a special case
101where the successes parameter r = 1,
102so only a first and only success is required.
103geometric(p) = negative_binomial(1, p).
104
105The Poisson distribution is a special case for large successes
106
107poisson([lambda]) = lim [sub r [rarr] [infin]] [space] negative_binomial(r, r / ([lambda] + r)))
108
109[discrete_quantile_warning Negative Binomial]
110
111[h4 Member Functions]
112
113[h5 Construct]
114
115 negative_binomial_distribution(RealType r, RealType p);
116
117Constructor: /r/ is the total number of successes, /p/ is the
118probability of success of a single trial.
119
120Requires: `r > 0` and `0 <= p <= 1`.
121
122[h5 Accessors]
123
124 RealType success_fraction() const; // successes / trials (0 <= p <= 1)
125
126Returns the parameter /p/ from which this distribution was constructed.
127
128 RealType successes() const; // required successes (r > 0)
129
130Returns the parameter /r/ from which this distribution was constructed.
131
132The best method of calculation for the following functions is disputed:
133see __binomial_distrib for more discussion.
134
135[h5 Lower Bound on Parameter p]
136
137 static RealType find_lower_bound_on_p(
138 RealType failures,
139 RealType successes,
140 RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
141
142Returns a *lower bound* on the success fraction:
143
144[variablelist
145[[failures][The total number of failures before the ['r]th success.]]
146[[successes][The number of successes required.]]
147[[alpha][The largest acceptable probability that the true value of
148 the success fraction is [*less than] the value returned.]]
149]
150
151For example, if you observe /k/ failures and /r/ successes from /n/ = k + r trials
152the best estimate for the success fraction is simply ['r/n], but if you
153want to be 95% sure that the true value is [*greater than] some value,
154['p[sub min]], then:
155
156 p``[sub min]`` = negative_binomial_distribution<RealType>::find_lower_bound_on_p(
157 failures, successes, 0.05);
158
159[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.]
160
161This function uses the Clopper-Pearson method of computing the lower bound on the
162success fraction, whilst many texts refer to this method as giving an "exact"
163result in practice it produces an interval that guarantees ['at least] the
164coverage required, and may produce pessimistic estimates for some combinations
165of /failures/ and /successes/. See:
166
167[@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
168Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
169Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
170
171[h5 Upper Bound on Parameter p]
172
173 static RealType find_upper_bound_on_p(
174 RealType trials,
175 RealType successes,
176 RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
177
178Returns an *upper bound* on the success fraction:
179
180[variablelist
181[[trials][The total number of trials conducted.]]
182[[successes][The number of successes that occurred.]]
183[[alpha][The largest acceptable probability that the true value of
184 the success fraction is [*greater than] the value returned.]]
185]
186
187For example, if you observe /k/ successes from /n/ trials the
188best estimate for the success fraction is simply ['k/n], but if you
189want to be 95% sure that the true value is [*less than] some value,
190['p[sub max]], then:
191
192 p``[sub max]`` = negative_binomial_distribution<RealType>::find_upper_bound_on_p(
193 r, k, 0.05);
194
195[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.]
196
197This function uses the Clopper-Pearson method of computing the lower bound on the
198success fraction, whilst many texts refer to this method as giving an "exact"
199result in practice it produces an interval that guarantees ['at least] the
200coverage required, and may produce pessimistic estimates for some combinations
201of /failures/ and /successes/. See:
202
203[@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
204Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
205Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
206
207[h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures]
208
209 static RealType find_minimum_number_of_trials(
210 RealType k, // number of failures.
211 RealType p, // success fraction.
212 RealType alpha); // probability threshold (0.05 equivalent to 95%).
213
214This functions estimates the number of trials required to achieve a certain
215probability that [*more than k failures will be observed].
216
217[variablelist
218[[k][The target number of failures to be observed.]]
219[[p][The probability of ['success] for each trial.]]
220[[alpha][The maximum acceptable risk that only k failures or fewer will be observed.]]
221]
222
223For example:
224
225 negative_binomial_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05);
226
227Returns the smallest number of trials we must conduct to be 95% sure
228of seeing 10 failures that occur with frequency one half.
229
230[link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.]
231
232This function uses numeric inversion of the negative binomial distribution
233to obtain the result: another interpretation of the result, is that it finds
234the number of trials (success+failures) that will lead to an /alpha/ probability
235of observing k failures or fewer.
236
237[h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less]
238
239 static RealType find_maximum_number_of_trials(
240 RealType k, // number of failures.
241 RealType p, // success fraction.
242 RealType alpha); // probability threshold (0.05 equivalent to 95%).
243
244This functions estimates the maximum number of trials we can conduct and achieve
245a certain probability that [*k failures or fewer will be observed].
246
247[variablelist
248[[k][The maximum number of failures to be observed.]]
249[[p][The probability of ['success] for each trial.]]
250[[alpha][The maximum acceptable ['risk] that more than k failures will be observed.]]
251]
252
253For example:
254
255 negative_binomial_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05);
256
257Returns the largest number of trials we can conduct and still be 95% sure
258of seeing no failures that occur with frequency one in one million.
259
260This function uses numeric inversion of the negative binomial distribution
261to obtain the result: another interpretation of the result, is that it finds
262the number of trials (success+failures) that will lead to an /alpha/ probability
263of observing more than k failures.
264
265[h4 Non-member Accessors]
266
267All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
268that are generic to all distributions are supported: __usual_accessors.
269
270However it's worth taking a moment to define what these actually mean in
271the context of this distribution:
272
273[table Meaning of the non-member accessors.
274[[Function][Meaning]]
275[[__pdf]
276 [The probability of obtaining [*exactly k failures] from k+r trials
277 with success fraction p. For example:
278
279``pdf(negative_binomial(r, p), k)``]]
280[[__cdf]
281 [The probability of obtaining [*k failures or fewer] from k+r trials
282 with success fraction p and success on the last trial. For example:
283
284``cdf(negative_binomial(r, p), k)``]]
285[[__ccdf]
286 [The probability of obtaining [*more than k failures] from k+r trials
287 with success fraction p and success on the last trial. For example:
288
289``cdf(complement(negative_binomial(r, p), k))``]]
290[[__quantile]
291 [The [*greatest] number of failures k expected to be observed from k+r trials
292 with success fraction p, at probability P. Note that the value returned
293 is a real-number, and not an integer. Depending on the use case you may
294 want to take either the floor or ceiling of the real result. For example:
295
296``quantile(negative_binomial(r, p), P)``]]
297[[__quantile_c]
298 [The [*smallest] number of failures k expected to be observed from k+r trials
299 with success fraction p, at probability P. Note that the value returned
300 is a real-number, and not an integer. Depending on the use case you may
301 want to take either the floor or ceiling of the real result. For example:
302 ``quantile(complement(negative_binomial(r, p), P))``]]
303]
304
305[h4 Accuracy]
306
307This distribution is implemented using the
308incomplete beta functions __ibeta and __ibetac:
309please refer to these functions for information on accuracy.
310
311[h4 Implementation]
312
313In the following table, /p/ is the probability that any one trial will
314be successful (the success fraction), /r/ is the number of successes,
315/k/ is the number of failures, /p/ is the probability and /q = 1-p/.
316
317[table
318[[Function][Implementation Notes]]
319[[pdf][pdf = exp(lgamma(r + k) - lgamma(r) - lgamma(k+1)) * pow(p, r) * pow((1-p), k)
320
321Implementation is in terms of __ibeta_derivative:
322
323(p/(r + k)) * ibeta_derivative(r, static_cast<RealType>(k+1), p)
324The function __ibeta_derivative is used here, since it has already
325been optimised for the lowest possible error - indeed this is really
326just a thin wrapper around part of the internals of the incomplete
327beta function.
328]]
329[[cdf][Using the relation:
330
331cdf = I[sub p](r, k+1) = ibeta(r, k+1, p)
332
333= ibeta(r, static_cast<RealType>(k+1), p)]]
334[[cdf complement][Using the relation:
335
3361 - cdf = I[sub p](k+1, r)
337
338= ibetac(r, static_cast<RealType>(k+1), p)
339]]
340[[quantile][ibeta_invb(r, p, P) - 1]]
341[[quantile from the complement][ibetac_invb(r, p, Q) -1)]]
342[[mean][ `r(1-p)/p` ]]
343[[variance][ `r (1-p) / p * p` ]]
344[[mode][`floor((r-1) * (1 - p)/p)`]]
345[[skewness][`(2 - p) / sqrt(r * (1 - p))`]]
346[[kurtosis][`6 / r + (p * p) / r * (1 - p )`]]
347[[kurtosis excess][`6 / r + (p * p) / r * (1 - p ) -3`]]
348[[parameter estimation member functions][]]
349[[`find_lower_bound_on_p`][ibeta_inv(successes, failures + 1, alpha)]]
350[[`find_upper_bound_on_p`][ibetac_inv(successes, failures, alpha) plus see comments in code.]]
351[[`find_minimum_number_of_trials`][ibeta_inva(k + 1, p, alpha)]]
352[[`find_maximum_number_of_trials`][ibetac_inva(k + 1, p, alpha)]]
353]
354
355Implementation notes:
356
357* The real concept type (that deliberately lacks the Lanczos approximation),
358was found to take several minutes to evaluate some extreme test values,
359so the test has been disabled for this type.
360
361* Much greater speed, and perhaps greater accuracy,
362might be achieved for extreme values by using a normal approximation.
363This is NOT been tested or implemented.
364
365[endsect][/section:negative_binomial_dist Negative Binomial]
366
367[/ negative_binomial.qbk
368 Copyright 2006 John Maddock and Paul A. Bristow.
369 Distributed under the Boost Software License, Version 1.0.
370 (See accompanying file LICENSE_1_0.txt or copy at
371 http://www.boost.org/LICENSE_1_0.txt).
372]
373