]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | [section:geometric_dist Geometric Distribution] |
2 | ||
3 | ``#include <boost/math/distributions/geometric.hpp>`` | |
4 | ||
5 | namespace boost{ namespace math{ | |
6 | ||
7 | template <class RealType = double, | |
8 | class ``__Policy`` = ``__policy_class`` > | |
9 | class geometric_distribution; | |
10 | ||
11 | typedef geometric_distribution<> geometric; | |
12 | ||
13 | template <class RealType, class ``__Policy``> | |
14 | class geometric_distribution | |
15 | { | |
16 | public: | |
17 | typedef RealType value_type; | |
18 | typedef Policy policy_type; | |
19 | // Constructor from success_fraction: | |
20 | geometric_distribution(RealType p); | |
21 | ||
22 | // Parameter accessors: | |
23 | RealType success_fraction() const; | |
24 | RealType successes() const; | |
25 | ||
26 | // Bounds on success fraction: | |
27 | static RealType find_lower_bound_on_p( | |
28 | RealType trials, | |
29 | RealType successes, | |
30 | RealType probability); // alpha | |
31 | static RealType find_upper_bound_on_p( | |
32 | RealType trials, | |
33 | RealType successes, | |
34 | RealType probability); // alpha | |
35 | ||
36 | // Estimate min/max number of trials: | |
37 | static RealType find_minimum_number_of_trials( | |
38 | RealType k, // Number of failures. | |
39 | RealType p, // Success fraction. | |
40 | RealType probability); // Probability threshold alpha. | |
41 | static RealType find_maximum_number_of_trials( | |
42 | RealType k, // Number of failures. | |
43 | RealType p, // Success fraction. | |
44 | RealType probability); // Probability threshold alpha. | |
45 | }; | |
46 | ||
47 | }} // namespaces | |
48 | ||
49 | The class type `geometric_distribution` represents a | |
50 | [@http://en.wikipedia.org/wiki/geometric_distribution geometric distribution]: | |
51 | it is used when there are exactly two mutually exclusive outcomes of a | |
52 | [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]: | |
53 | these outcomes are labelled "success" and "failure". | |
54 | ||
55 | For [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trials] | |
56 | each with success fraction /p/, the geometric distribution gives | |
57 | the probability of observing /k/ trials (failures, events, occurrences, or arrivals) | |
58 | before the first success. | |
59 | ||
60 | [note For this implementation, the set of trials *includes zero* | |
61 | (unlike another definition where the set of trials starts at one, sometimes named /shifted/).] | |
62 | The geometric distribution assumes that success_fraction /p/ is fixed for all /k/ trials. | |
63 | ||
64 | The probability that there are /k/ failures before the first success is | |
65 | ||
66 | __spaces Pr(Y=/k/) = (1-/p/)[super /k/]/p/ | |
67 | ||
68 | For example, when throwing a 6-face dice the success probability /p/ = 1/6 = 0.1666[recur][space]. | |
69 | Throwing repeatedly until a /three/ appears, | |
70 | the probability distribution of the number of times /not-a-three/ is thrown | |
71 | is geometric. | |
72 | ||
73 | Geometric distribution has the Probability Density Function PDF: | |
74 | ||
75 | __spaces (1-/p/)[super /k/]/p/ | |
76 | ||
77 | The following graph illustrates how the PDF and CDF vary for three examples | |
78 | of the success fraction /p/, | |
79 | (when considering the geometric distribution as a continuous function), | |
80 | ||
81 | [graph geometric_pdf_2] | |
82 | ||
83 | [graph geometric_cdf_2] | |
84 | ||
85 | and as discrete. | |
86 | ||
87 | [graph geometric_pdf_discrete] | |
88 | ||
89 | [graph geometric_cdf_discrete] | |
90 | ||
91 | ||
92 | [h4 Related Distributions] | |
93 | ||
94 | The geometric distribution is a special case of | |
95 | the __negative_binomial_distrib with successes parameter /r/ = 1, | |
96 | so only one first and only success is required : thus by definition | |
97 | __spaces `geometric(p) == negative_binomial(1, p)` | |
98 | ||
99 | negative_binomial_distribution(RealType r, RealType success_fraction); | |
100 | negative_binomial nb(1, success_fraction); | |
101 | geometric g(success_fraction); | |
102 | ASSERT(pdf(nb, 1) == pdf(g, 1)); | |
103 | ||
104 | This implementation uses real numbers for the computation throughout | |
105 | (because it uses the *real-valued* power and exponential functions). | |
106 | So to obtain a conventional strictly-discrete geometric distribution | |
107 | you must ensure that an integer value is provided for the number of trials | |
108 | (random variable) /k/, | |
109 | and take integer values (floor or ceil functions) from functions that return | |
110 | a number of successes. | |
111 | ||
112 | [discrete_quantile_warning geometric] | |
113 | ||
114 | [h4 Member Functions] | |
115 | ||
116 | [h5 Constructor] | |
117 | ||
118 | geometric_distribution(RealType p); | |
119 | ||
120 | Constructor: /p/ or success_fraction is the probability of success of a single trial. | |
121 | ||
122 | Requires: `0 <= p <= 1`. | |
123 | ||
124 | [h5 Accessors] | |
125 | ||
126 | RealType success_fraction() const; // successes / trials (0 <= p <= 1) | |
127 | ||
128 | Returns the success_fraction parameter /p/ from which this distribution was constructed. | |
129 | ||
130 | RealType successes() const; // required successes always one, | |
131 | // included for compatibility with negative binomial distribution | |
132 | // with successes r == 1. | |
133 | ||
134 | Returns unity. | |
135 | ||
136 | The following functions are equivalent to those provided for the negative binomial, | |
137 | with successes = 1, but are provided here for completeness. | |
138 | ||
139 | The best method of calculation for the following functions is disputed: | |
140 | see __binomial_distrib and __negative_binomial_distrib for more discussion. | |
141 | ||
142 | [h5 Lower Bound on success_fraction Parameter ['p]] | |
143 | ||
144 | static RealType find_lower_bound_on_p( | |
145 | RealType failures, | |
146 | RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | |
147 | ||
148 | Returns a *lower bound* on the success fraction: | |
149 | ||
150 | [variablelist | |
151 | [[failures][The total number of failures before the 1st success.]] | |
152 | [[alpha][The largest acceptable probability that the true value of | |
153 | the success fraction is [*less than] the value returned.]] | |
154 | ] | |
155 | ||
156 | For example, if you observe /k/ failures from /n/ trials | |
157 | the best estimate for the success fraction is simply 1/['n], but if you | |
158 | want to be 95% sure that the true value is [*greater than] some value, | |
159 | ['p[sub min]], then: | |
160 | ||
161 | p``[sub min]`` = geometric_distribution<RealType>:: | |
162 | find_lower_bound_on_p(failures, 0.05); | |
163 | ||
164 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative_binomial confidence interval example.] | |
165 | ||
166 | This function uses the Clopper-Pearson method of computing the lower bound on the | |
167 | success fraction, whilst many texts refer to this method as giving an "exact" | |
168 | result in practice it produces an interval that guarantees ['at least] the | |
169 | coverage required, and may produce pessimistic estimates for some combinations | |
170 | of /failures/ and /successes/. See: | |
171 | ||
172 | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | |
173 | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | |
174 | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | |
175 | ||
176 | [h5 Upper Bound on success_fraction Parameter p] | |
177 | ||
178 | static RealType find_upper_bound_on_p( | |
179 | RealType trials, | |
180 | RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence. | |
181 | ||
182 | Returns an *upper bound* on the success fraction: | |
183 | ||
184 | [variablelist | |
185 | [[trials][The total number of trials conducted.]] | |
186 | [[alpha][The largest acceptable probability that the true value of | |
187 | the success fraction is [*greater than] the value returned.]] | |
188 | ] | |
189 | ||
190 | For example, if you observe /k/ successes from /n/ trials the | |
191 | best estimate for the success fraction is simply ['k/n], but if you | |
192 | want to be 95% sure that the true value is [*less than] some value, | |
193 | ['p[sub max]], then: | |
194 | ||
195 | p``[sub max]`` = geometric_distribution<RealType>::find_upper_bound_on_p( | |
196 | k, 0.05); | |
197 | ||
198 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.] | |
199 | ||
200 | This function uses the Clopper-Pearson method of computing the lower bound on the | |
201 | success fraction, whilst many texts refer to this method as giving an "exact" | |
202 | result in practice it produces an interval that guarantees ['at least] the | |
203 | coverage required, and may produce pessimistic estimates for some combinations | |
204 | of /failures/ and /successes/. See: | |
205 | ||
206 | [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf | |
207 | Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions. | |
208 | Computational statistics and data analysis, 2005, vol. 48, no3, 605-621]. | |
209 | ||
210 | [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures] | |
211 | ||
212 | static RealType find_minimum_number_of_trials( | |
213 | RealType k, // number of failures. | |
214 | RealType p, // success fraction. | |
215 | RealType alpha); // probability threshold (0.05 equivalent to 95%). | |
216 | ||
217 | This functions estimates the number of trials required to achieve a certain | |
218 | probability that [*more than ['k] failures will be observed]. | |
219 | ||
220 | [variablelist | |
221 | [[k][The target number of failures to be observed.]] | |
222 | [[p][The probability of ['success] for each trial.]] | |
223 | [[alpha][The maximum acceptable ['risk] that only ['k] failures or fewer will be observed.]] | |
224 | ] | |
225 | ||
226 | For example: | |
227 | ||
228 | geometric_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05); | |
229 | ||
230 | Returns the smallest number of trials we must conduct to be 95% (1-0.05) sure | |
231 | of seeing 10 failures that occur with frequency one half. | |
232 | ||
233 | [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.] | |
234 | ||
235 | This function uses numeric inversion of the geometric distribution | |
236 | to obtain the result: another interpretation of the result is that it finds | |
237 | the number of trials (failures) that will lead to an /alpha/ probability | |
238 | of observing /k/ failures or fewer. | |
239 | ||
240 | [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less] | |
241 | ||
242 | static RealType find_maximum_number_of_trials( | |
243 | RealType k, // number of failures. | |
244 | RealType p, // success fraction. | |
245 | RealType alpha); // probability threshold (0.05 equivalent to 95%). | |
246 | ||
247 | This functions estimates the maximum number of trials we can conduct and achieve | |
248 | a certain probability that [*k failures or fewer will be observed]. | |
249 | ||
250 | [variablelist | |
251 | [[k][The maximum number of failures to be observed.]] | |
252 | [[p][The probability of ['success] for each trial.]] | |
253 | [[alpha][The maximum acceptable ['risk] that more than ['k] failures will be observed.]] | |
254 | ] | |
255 | ||
256 | For example: | |
257 | ||
258 | geometric_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05); | |
259 | ||
260 | Returns the largest number of trials we can conduct and still be 95% sure | |
261 | of seeing no failures that occur with frequency one in one million. | |
262 | ||
263 | This function uses numeric inversion of the geometric distribution | |
264 | to obtain the result: another interpretation of the result, is that it finds | |
265 | the number of trials that will lead to an /alpha/ probability | |
266 | of observing more than k failures. | |
267 | ||
268 | [h4 Non-member Accessors] | |
269 | ||
270 | All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] | |
271 | that are generic to all distributions are supported: __usual_accessors. | |
272 | ||
273 | However it's worth taking a moment to define what these actually mean in | |
274 | the context of this distribution: | |
275 | ||
276 | [table Meaning of the non-member accessors. | |
277 | [[Function][Meaning]] | |
278 | [[__pdf] | |
279 | [The probability of obtaining [*exactly k failures] from /k/ trials | |
280 | with success fraction p. For example: | |
281 | ||
282 | ``pdf(geometric(p), k)``]] | |
283 | [[__cdf] | |
284 | [The probability of obtaining [*k failures or fewer] from /k/ trials | |
285 | with success fraction p and success on the last trial. For example: | |
286 | ||
287 | ``cdf(geometric(p), k)``]] | |
288 | [[__ccdf] | |
289 | [The probability of obtaining [*more than k failures] from /k/ trials | |
290 | with success fraction p and success on the last trial. For example: | |
291 | ||
292 | ``cdf(complement(geometric(p), k))``]] | |
293 | [[__quantile] | |
294 | [The [*greatest] number of failures /k/ expected to be observed from /k/ trials | |
295 | with success fraction /p/, at probability /P/. Note that the value returned | |
296 | is a real-number, and not an integer. Depending on the use case you may | |
297 | want to take either the floor or ceiling of the real result. For example: | |
298 | ``quantile(geometric(p), P)``]] | |
299 | [[__quantile_c] | |
300 | [The [*smallest] number of failures /k/ expected to be observed from /k/ trials | |
301 | with success fraction /p/, at probability /P/. Note that the value returned | |
302 | is a real-number, and not an integer. Depending on the use case you may | |
303 | want to take either the floor or ceiling of the real result. For example: | |
304 | ``quantile(complement(geometric(p), P))``]] | |
305 | ] | |
306 | ||
307 | [h4 Accuracy] | |
308 | ||
309 | This distribution is implemented using the pow and exp functions, so most results | |
310 | are accurate within a few epsilon for the RealType. | |
311 | For extreme values of `double` /p/, for example 0.9999999999, | |
312 | accuracy can fall significantly, for example to 10 decimal digits (from 16). | |
313 | ||
314 | [h4 Implementation] | |
315 | ||
316 | In the following table, /p/ is the probability that any one trial will | |
317 | be successful (the success fraction), /k/ is the number of failures, | |
318 | /p/ is the probability and /q = 1-p/, | |
319 | /x/ is the given probability to estimate | |
320 | the expected number of failures using the quantile. | |
321 | ||
322 | [table | |
323 | [[Function][Implementation Notes]] | |
324 | [[pdf][pdf = p * pow(q, k)]] | |
325 | [[cdf][cdf = 1 - q[super k=1]]] | |
326 | [[cdf complement][exp(log1p(-p) * (k+1))]] | |
327 | [[quantile][k = log1p(-x) / log1p(-p) -1]] | |
328 | [[quantile from the complement][k = log(x) / log1p(-p) -1]] | |
329 | [[mean][(1-p)/p]] | |
330 | [[variance][(1-p)/p[sup2]]] | |
331 | [[mode][0]] | |
332 | [[skewness][(2-p)/[sqrt]q]] | |
333 | [[kurtosis][9+p[sup2]/q]] | |
334 | [[kurtosis excess][6 +p[sup2]/q]] | |
335 | [[parameter estimation member functions][See __negative_binomial_distrib]] | |
336 | [[`find_lower_bound_on_p`][See __negative_binomial_distrib]] | |
337 | [[`find_upper_bound_on_p`][See __negative_binomial_distrib]] | |
338 | [[`find_minimum_number_of_trials`][See __negative_binomial_distrib]] | |
339 | [[`find_maximum_number_of_trials`][See __negative_binomial_distrib]] | |
340 | ] | |
341 | ||
342 | [endsect][/section:geometric_dist geometric] | |
343 | ||
344 | [/ geometric.qbk | |
345 | Copyright 2010 John Maddock and Paul A. Bristow. | |
346 | Distributed under the Boost Software License, Version 1.0. | |
347 | (See accompanying file LICENSE_1_0.txt or copy at | |
348 | http://www.boost.org/LICENSE_1_0.txt). | |
349 | ] | |
350 |