]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | [section:negative_binomial_dist Negative Binomial Distribution] |
2 | ||
3 | ``#include <boost/math/distributions/negative_binomial.hpp>`` | |
4 | ||
5 | namespace boost{ namespace math{ | |
6 | ||
7 | template <class RealType = double, | |
8 | class ``__Policy`` = ``__policy_class`` > | |
9 | class negative_binomial_distribution; | |
10 | ||
11 | typedef negative_binomial_distribution<> negative_binomial; | |
12 | ||
13 | template <class RealType, class ``__Policy``> | |
14 | class negative_binomial_distribution | |
15 | { | |
16 | public: | |
17 | typedef RealType value_type; | |
18 | typedef Policy policy_type; | |
19 | // Constructor from successes and success_fraction: | |
20 | negative_binomial_distribution(RealType r, RealType p); | |
21 | ||
22 | // Parameter accessors: | |
23 | RealType success_fraction() const; | |
24 | RealType successes() const; | |
25 | ||
26 | // Bounds on success fraction: | |
27 | static RealType find_lower_bound_on_p( | |
28 | RealType trials, | |
29 | RealType successes, | |
30 | RealType probability); // alpha | |
31 | static RealType find_upper_bound_on_p( | |
32 | RealType trials, | |
33 | RealType successes, | |
34 | RealType probability); // alpha | |
35 | ||
36 | // Estimate min/max number of trials: | |
37 | static RealType find_minimum_number_of_trials( | |
38 | RealType k, // Number of failures. | |
39 | RealType p, // Success fraction. | |
40 | RealType probability); // Probability threshold alpha. | |
41 | static RealType find_maximum_number_of_trials( | |
42 | RealType k, // Number of failures. | |
43 | RealType p, // Success fraction. | |
44 | RealType probability); // Probability threshold alpha. | |
45 | }; | |
46 | ||
47 | }} // namespaces | |
48 | ||
49 | The class type `negative_binomial_distribution` represents a | |
50 | [@http://en.wikipedia.org/wiki/Negative_binomial_distribution negative_binomial distribution]: | |
51 | it is used when there are exactly two mutually exclusive outcomes of a | |
52 | [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]: | |
53 | these outcomes are labelled "success" and "failure". | |
54 | ||
55 | For k + r Bernoulli trials each with success fraction p, the | |
56 | negative_binomial distribution gives the probability of observing | |
57 | k failures and r successes with success on the last trial. | |
58 | The negative_binomial distribution | |
59 | assumes that success_fraction p is fixed for all (k + r) trials. | |
60 | ||
61 | [note The random variable for the negative binomial distribution is the number of trials, | |
62 | (the number of successes is a fixed property of the distribution) | |
63 | whereas for the binomial, | |
64 | the random variable is the number of successes, for a fixed number of trials.] | |
65 | ||
66 | It has the PDF: | |
67 | ||
68 | [equation neg_binomial_ref] | |
69 | ||
70 | The following graph illustrate how the PDF varies as the success fraction | |
71 | /p/ changes: | |
72 | ||
73 | [graph negative_binomial_pdf_1] | |
74 | ||
75 | Alternatively, this graph shows how the shape of the PDF varies as | |
76 | the number of successes changes: | |
77 | ||
78 | [graph negative_binomial_pdf_2] | |
79 | ||
80 | [h4 Related Distributions] | |
81 | ||
82 | The name negative binomial distribution is reserved by some to the | |
83 | case where the successes parameter r is an integer. | |
84 | This integer version is also called the | |
85 | [@http://mathworld.wolfram.com/PascalDistribution.html Pascal distribution]. | |
86 | ||
87 | This implementation uses real numbers for the computation throughout | |
88 | (because it uses the *real-valued* incomplete beta function family of functions). | |
89 | This real-valued version is also called the Polya Distribution. | |
90 | ||
91 | The Poisson distribution is a generalization of the Pascal distribution, | |
92 | where the success parameter r is an integer: to obtain the Pascal | |
93 | distribution you must ensure that an integer value is provided for r, | |
94 | and take integer values (floor or ceiling) from functions that return | |
95 | a number of successes. | |
96 | ||
97 | For large values of r (successes), the negative binomial distribution | |
98 | converges to the Poisson distribution. | |
99 | ||
100 | The geometric distribution is a special case | |
101 | where the successes parameter r = 1, | |
102 | so only a first and only success is required. | |
103 | geometric(p) = negative_binomial(1, p). | |
104 | ||
105 | The Poisson distribution is a special case for large successes | |
106 | ||
107 | poisson([lambda]) = lim [sub r [rarr] [infin]] [space] negative_binomial(r, r / ([lambda] + r))) | |
108 | ||
109 | [discrete_quantile_warning Negative Binomial] | |
110 | ||
111 | [h4 Member Functions] | |
112 | ||
113 | [h5 Construct] | |
114 | ||
115 | negative_binomial_distribution(RealType r, RealType p); | |
116 | ||
117 | Constructor: /r/ is the total number of successes, /p/ is the | |
118 | probability of success of a single trial. | |
119 | ||
120 | Requires: `r > 0` and `0 <= p <= 1`. | |
121 | ||
122 | [h5 Accessors] | |
123 | ||
124 | RealType success_fraction() const; // successes / trials (0 <= p <= 1) | |
125 | ||
126 | Returns the parameter /p/ from which this distribution was constructed. | |
127 | ||
128 | RealType successes() const; // required successes (r > 0) | |
129 | ||
130 | Returns the parameter /r/ from which this distribution was constructed. | |
131 | ||
132 | The best method of calculation for the following functions is disputed: | |
133 | see __binomial_distrib for more discussion. | |
134 | ||
135 | [h5 Lower Bound on Parameter p] | |
136 | ||
137 | static RealType find_lower_bound_on_p( | |
138 | RealType failures, | |
139 | RealType successes, | |
140 | RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | |
141 | ||
142 | Returns a *lower bound* on the success fraction: | |
143 | ||
144 | [variablelist | |
145 | [[failures][The total number of failures before the ['r]th success.]] | |
146 | [[successes][The number of successes required.]] | |
147 | [[alpha][The largest acceptable probability that the true value of | |
148 | the success fraction is [*less than] the value returned.]] | |
149 | ] | |
150 | ||
151 | For example, if you observe /k/ failures and /r/ successes from /n/ = k + r trials | |
152 | the best estimate for the success fraction is simply ['r/n], but if you | |
153 | want to be 95% sure that the true value is [*greater than] some value, | |
154 | ['p[sub min]], then: | |
155 | ||
156 | p``[sub min]`` = negative_binomial_distribution<RealType>::find_lower_bound_on_p( | |
157 | failures, successes, 0.05); | |
158 | ||
159 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.] | |
160 | ||
161 | This function uses the Clopper-Pearson method of computing the lower bound on the | |
162 | success fraction, whilst many texts refer to this method as giving an "exact" | |
163 | result in practice it produces an interval that guarantees ['at least] the | |
164 | coverage required, and may produce pessimistic estimates for some combinations | |
165 | of /failures/ and /successes/. See: | |
166 | ||
167 | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | |
168 | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | |
169 | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | |
170 | ||
171 | [h5 Upper Bound on Parameter p] | |
172 | ||
173 | static RealType find_upper_bound_on_p( | |
174 | RealType trials, | |
175 | RealType successes, | |
176 | RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | |
177 | ||
178 | Returns an *upper bound* on the success fraction: | |
179 | ||
180 | [variablelist | |
181 | [[trials][The total number of trials conducted.]] | |
182 | [[successes][The number of successes that occurred.]] | |
183 | [[alpha][The largest acceptable probability that the true value of | |
184 | the success fraction is [*greater than] the value returned.]] | |
185 | ] | |
186 | ||
187 | For example, if you observe /k/ successes from /n/ trials the | |
188 | best estimate for the success fraction is simply ['k/n], but if you | |
189 | want to be 95% sure that the true value is [*less than] some value, | |
190 | ['p[sub max]], then: | |
191 | ||
192 | p``[sub max]`` = negative_binomial_distribution<RealType>::find_upper_bound_on_p( | |
193 | r, k, 0.05); | |
194 | ||
195 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.] | |
196 | ||
197 | This function uses the Clopper-Pearson method of computing the lower bound on the | |
198 | success fraction, whilst many texts refer to this method as giving an "exact" | |
199 | result in practice it produces an interval that guarantees ['at least] the | |
200 | coverage required, and may produce pessimistic estimates for some combinations | |
201 | of /failures/ and /successes/. See: | |
202 | ||
203 | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | |
204 | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | |
205 | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | |
206 | ||
207 | [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures] | |
208 | ||
209 | static RealType find_minimum_number_of_trials( | |
210 | RealType k, // number of failures. | |
211 | RealType p, // success fraction. | |
212 | RealType alpha); // probability threshold (0.05 equivalent to 95%). | |
213 | ||
214 | This functions estimates the number of trials required to achieve a certain | |
215 | probability that [*more than k failures will be observed]. | |
216 | ||
217 | [variablelist | |
218 | [[k][The target number of failures to be observed.]] | |
219 | [[p][The probability of ['success] for each trial.]] | |
220 | [[alpha][The maximum acceptable risk that only k failures or fewer will be observed.]] | |
221 | ] | |
222 | ||
223 | For example: | |
224 | ||
225 | negative_binomial_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05); | |
226 | ||
227 | Returns the smallest number of trials we must conduct to be 95% sure | |
228 | of seeing 10 failures that occur with frequency one half. | |
229 | ||
230 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.] | |
231 | ||
232 | This function uses numeric inversion of the negative binomial distribution | |
233 | to obtain the result: another interpretation of the result, is that it finds | |
234 | the number of trials (success+failures) that will lead to an /alpha/ probability | |
235 | of observing k failures or fewer. | |
236 | ||
237 | [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less] | |
238 | ||
239 | static RealType find_maximum_number_of_trials( | |
240 | RealType k, // number of failures. | |
241 | RealType p, // success fraction. | |
242 | RealType alpha); // probability threshold (0.05 equivalent to 95%). | |
243 | ||
244 | This functions estimates the maximum number of trials we can conduct and achieve | |
245 | a certain probability that [*k failures or fewer will be observed]. | |
246 | ||
247 | [variablelist | |
248 | [[k][The maximum number of failures to be observed.]] | |
249 | [[p][The probability of ['success] for each trial.]] | |
250 | [[alpha][The maximum acceptable ['risk] that more than k failures will be observed.]] | |
251 | ] | |
252 | ||
253 | For example: | |
254 | ||
255 | negative_binomial_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05); | |
256 | ||
257 | Returns the largest number of trials we can conduct and still be 95% sure | |
258 | of seeing no failures that occur with frequency one in one million. | |
259 | ||
260 | This function uses numeric inversion of the negative binomial distribution | |
261 | to obtain the result: another interpretation of the result, is that it finds | |
262 | the number of trials (success+failures) that will lead to an /alpha/ probability | |
263 | of observing more than k failures. | |
264 | ||
265 | [h4 Non-member Accessors] | |
266 | ||
267 | All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] | |
268 | that are generic to all distributions are supported: __usual_accessors. | |
269 | ||
270 | However it's worth taking a moment to define what these actually mean in | |
271 | the context of this distribution: | |
272 | ||
273 | [table Meaning of the non-member accessors. | |
274 | [[Function][Meaning]] | |
275 | [[__pdf] | |
276 | [The probability of obtaining [*exactly k failures] from k+r trials | |
277 | with success fraction p. For example: | |
278 | ||
279 | ``pdf(negative_binomial(r, p), k)``]] | |
280 | [[__cdf] | |
281 | [The probability of obtaining [*k failures or fewer] from k+r trials | |
282 | with success fraction p and success on the last trial. For example: | |
283 | ||
284 | ``cdf(negative_binomial(r, p), k)``]] | |
285 | [[__ccdf] | |
286 | [The probability of obtaining [*more than k failures] from k+r trials | |
287 | with success fraction p and success on the last trial. For example: | |
288 | ||
289 | ``cdf(complement(negative_binomial(r, p), k))``]] | |
290 | [[__quantile] | |
291 | [The [*greatest] number of failures k expected to be observed from k+r trials | |
292 | with success fraction p, at probability P. Note that the value returned | |
293 | is a real-number, and not an integer. Depending on the use case you may | |
294 | want to take either the floor or ceiling of the real result. For example: | |
295 | ||
296 | ``quantile(negative_binomial(r, p), P)``]] | |
297 | [[__quantile_c] | |
298 | [The [*smallest] number of failures k expected to be observed from k+r trials | |
299 | with success fraction p, at probability P. Note that the value returned | |
300 | is a real-number, and not an integer. Depending on the use case you may | |
301 | want to take either the floor or ceiling of the real result. For example: | |
302 | ``quantile(complement(negative_binomial(r, p), P))``]] | |
303 | ] | |
304 | ||
305 | [h4 Accuracy] | |
306 | ||
307 | This distribution is implemented using the | |
308 | incomplete beta functions __ibeta and __ibetac: | |
309 | please refer to these functions for information on accuracy. | |
310 | ||
311 | [h4 Implementation] | |
312 | ||
313 | In the following table, /p/ is the probability that any one trial will | |
314 | be successful (the success fraction), /r/ is the number of successes, | |
315 | /k/ is the number of failures, /p/ is the probability and /q = 1-p/. | |
316 | ||
317 | [table | |
318 | [[Function][Implementation Notes]] | |
319 | [[pdf][pdf = exp(lgamma(r + k) - lgamma(r) - lgamma(k+1)) * pow(p, r) * pow((1-p), k) | |
320 | ||
321 | Implementation is in terms of __ibeta_derivative: | |
322 | ||
323 | (p/(r + k)) * ibeta_derivative(r, static_cast<RealType>(k+1), p) | |
324 | The function __ibeta_derivative is used here, since it has already | |
325 | been optimised for the lowest possible error - indeed this is really | |
326 | just a thin wrapper around part of the internals of the incomplete | |
327 | beta function. | |
328 | ]] | |
329 | [[cdf][Using the relation: | |
330 | ||
331 | cdf = I[sub p](r, k+1) = ibeta(r, k+1, p) | |
332 | ||
333 | = ibeta(r, static_cast<RealType>(k+1), p)]] | |
334 | [[cdf complement][Using the relation: | |
335 | ||
336 | 1 - cdf = I[sub p](k+1, r) | |
337 | ||
338 | = ibetac(r, static_cast<RealType>(k+1), p) | |
339 | ]] | |
340 | [[quantile][ibeta_invb(r, p, P) - 1]] | |
341 | [[quantile from the complement][ibetac_invb(r, p, Q) -1)]] | |
342 | [[mean][ `r(1-p)/p` ]] | |
343 | [[variance][ `r (1-p) / p * p` ]] | |
344 | [[mode][`floor((r-1) * (1 - p)/p)`]] | |
345 | [[skewness][`(2 - p) / sqrt(r * (1 - p))`]] | |
346 | [[kurtosis][`6 / r + (p * p) / r * (1 - p )`]] | |
347 | [[kurtosis excess][`6 / r + (p * p) / r * (1 - p ) -3`]] | |
348 | [[parameter estimation member functions][]] | |
349 | [[`find_lower_bound_on_p`][ibeta_inv(successes, failures + 1, alpha)]] | |
350 | [[`find_upper_bound_on_p`][ibetac_inv(successes, failures, alpha) plus see comments in code.]] | |
351 | [[`find_minimum_number_of_trials`][ibeta_inva(k + 1, p, alpha)]] | |
352 | [[`find_maximum_number_of_trials`][ibetac_inva(k + 1, p, alpha)]] | |
353 | ] | |
354 | ||
355 | Implementation notes: | |
356 | ||
357 | * The real concept type (that deliberately lacks the Lanczos approximation), | |
358 | was found to take several minutes to evaluate some extreme test values, | |
359 | so the test has been disabled for this type. | |
360 | ||
361 | * Much greater speed, and perhaps greater accuracy, | |
362 | might be achieved for extreme values by using a normal approximation. | |
363 | This is NOT been tested or implemented. | |
364 | ||
365 | [endsect][/section:negative_binomial_dist Negative Binomial] | |
366 | ||
367 | [/ negative_binomial.qbk | |
368 | Copyright 2006 John Maddock and Paul A. Bristow. | |
369 | Distributed under the Boost Software License, Version 1.0. | |
370 | (See accompanying file LICENSE_1_0.txt or copy at | |
371 | http://www.boost.org/LICENSE_1_0.txt). | |
372 | ] | |
373 |