]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // geometric_examples.cpp |
2 | ||
3 | // Copyright Paul A. Bristow 2010. | |
4 | ||
5 | // Use, modification and distribution are subject to the | |
6 | // Boost Software License, Version 1.0. | |
7 | // (See accompanying file LICENSE_1_0.txt | |
8 | // or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | ||
10 | // This file is written to be included from a Quickbook .qbk document. | |
11 | // It can still be compiled by the C++ compiler, and run. | |
12 | // Any output can also be added here as comment or included or pasted in elsewhere. | |
13 | // Caution: this file contains Quickbook markup as well as code | |
14 | // and comments: don't change any of the special comment markups! | |
15 | ||
16 | // Examples of using the geometric distribution. | |
17 | ||
18 | //[geometric_eg1_1 | |
19 | /*` | |
20 | For this example, we will opt to #define two macros to control | |
21 | the error and discrete handling policies. | |
22 | For this simple example, we want to avoid throwing | |
23 | an exception (the default policy) and just return infinity. | |
24 | We want to treat the distribution as if it was continuous, | |
25 | so we choose a discrete_quantile policy of real, | |
26 | rather than the default policy integer_round_outwards. | |
27 | */ | |
28 | #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error | |
29 | #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real | |
30 | /*` | |
31 | [caution It is vital to #include distributions etc *after* the above #defines] | |
32 | After that we need some includes to provide easy access to the negative binomial distribution, | |
33 | and we need some std library iostream, of course. | |
34 | */ | |
35 | #include <boost/math/distributions/geometric.hpp> | |
36 | // for geometric_distribution | |
37 | using ::boost::math::geometric_distribution; // | |
38 | using ::boost::math::geometric; // typedef provides default type is double. | |
39 | using ::boost::math::pdf; // Probability mass function. | |
40 | using ::boost::math::cdf; // Cumulative density function. | |
41 | using ::boost::math::quantile; | |
42 | ||
43 | #include <boost/math/distributions/negative_binomial.hpp> | |
44 | // for negative_binomial_distribution | |
45 | using boost::math::negative_binomial; // typedef provides default type is double. | |
46 | ||
47 | #include <boost/math/distributions/normal.hpp> | |
48 | // for negative_binomial_distribution | |
49 | using boost::math::normal; // typedef provides default type is double. | |
50 | ||
51 | #include <iostream> | |
52 | using std::cout; using std::endl; | |
53 | using std::noshowpoint; using std::fixed; using std::right; using std::left; | |
54 | #include <iomanip> | |
55 | using std::setprecision; using std::setw; | |
56 | ||
57 | #include <limits> | |
58 | using std::numeric_limits; | |
59 | //] [geometric_eg1_1] | |
60 | ||
61 | int main() | |
62 | { | |
63 | cout <<"Geometric distribution example" << endl; | |
64 | cout << endl; | |
65 | ||
66 | cout.precision(4); // But only show a few for this example. | |
67 | try | |
68 | { | |
69 | //[geometric_eg1_2 | |
70 | /*` | |
71 | It is always sensible to use try and catch blocks because defaults policies are to | |
72 | throw an exception if anything goes wrong. | |
73 | ||
74 | Simple try'n'catch blocks (see below) will ensure that you get a | |
75 | helpful error message instead of an abrupt (and silent) program abort. | |
76 | ||
77 | [h6 Throwing a dice] | |
78 | The Geometric distribution describes the probability (/p/) of a number of failures | |
79 | to get the first success in /k/ Bernoulli trials. | |
80 | (A [@http://en.wikipedia.org/wiki/Bernoulli_distribution Bernoulli trial] | |
81 | is one with only two possible outcomes, success of failure, | |
82 | and /p/ is the probability of success). | |
83 | ||
84 | Suppose an 'fair' 6-face dice is thrown repeatedly: | |
85 | */ | |
86 | double success_fraction = 1./6; // success_fraction (p) = 0.1666 | |
87 | // (so failure_fraction is 1 - success_fraction = 5./6 = 1- 0.1666 = 0.8333) | |
88 | ||
89 | /*`If the dice is thrown repeatedly until the *first* time a /three/ appears. | |
f67539c2 | 90 | The probability distribution of the number of times it is thrown *not* getting a /three/ |
7c673cae FG |
91 | (/not-a-threes/ number of failures to get a /three/) |
92 | is a geometric distribution with the success_fraction = 1/6 = 0.1666[recur]. | |
93 | ||
94 | We therefore start by constructing a geometric distribution | |
95 | with the one parameter success_fraction, the probability of success. | |
96 | */ | |
97 | geometric g6(success_fraction); // type double by default. | |
98 | /*` | |
99 | To confirm, we can echo the success_fraction parameter of the distribution. | |
100 | */ | |
101 | cout << "success fraction of a six-sided dice is " << g6.success_fraction() << endl; | |
102 | /*`So the probability of getting a three at the first throw (zero failures) is | |
103 | */ | |
104 | cout << pdf(g6, 0) << endl; // 0.1667 | |
105 | cout << cdf(g6, 0) << endl; // 0.1667 | |
106 | /*`Note that the cdf and pdf are identical because the is only one throw. | |
107 | If we want the probability of getting the first /three/ on the 2nd throw: | |
108 | */ | |
109 | cout << pdf(g6, 1) << endl; // 0.1389 | |
110 | ||
111 | /*`If we want the probability of getting the first /three/ on the 1st or 2nd throw | |
112 | (allowing one failure): | |
113 | */ | |
114 | cout << "pdf(g6, 0) + pdf(g6, 1) = " << pdf(g6, 0) + pdf(g6, 1) << endl; | |
115 | /*`Or more conveniently, and more generally, | |
116 | we can use the Cumulative Distribution Function CDF.*/ | |
117 | ||
118 | cout << "cdf(g6, 1) = " << cdf(g6, 1) << endl; // 0.3056 | |
119 | ||
120 | /*`If we allow many more (12) throws, the probability of getting our /three/ gets very high:*/ | |
121 | cout << "cdf(g6, 12) = " << cdf(g6, 12) << endl; // 0.9065 or 90% probability. | |
122 | /*`If we want to be much more confident, say 99%, | |
123 | we can estimate the number of throws to be this sure | |
124 | using the inverse or quantile. | |
125 | */ | |
126 | cout << "quantile(g6, 0.99) = " << quantile(g6, 0.99) << endl; // 24.26 | |
127 | /*`Note that the value returned is not an integer: | |
128 | if you want an integer result you should use either floor, round or ceil functions, | |
129 | or use the policies mechanism. | |
130 | ||
131 | See __understand_dis_quant. | |
132 | ||
133 | The geometric distribution is related to the negative binomial | |
134 | __spaces `negative_binomial_distribution(RealType r, RealType p);` with parameter /r/ = 1. | |
135 | So we could get the same result using the negative binomial, | |
136 | but using the geometric the results will be faster, and may be more accurate. | |
137 | */ | |
138 | negative_binomial nb(1, success_fraction); | |
139 | cout << pdf(nb, 1) << endl; // 0.1389 | |
140 | cout << cdf(nb, 1) << endl; // 0.3056 | |
141 | /*`We could also the complement to express the required probability | |
142 | as 1 - 0.99 = 0.01 (and get the same result): | |
143 | */ | |
144 | cout << "quantile(complement(g6, 1 - p)) " << quantile(complement(g6, 0.01)) << endl; // 24.26 | |
145 | /*` | |
146 | Note too that Boost.Math geometric distribution is implemented as a continuous function. | |
147 | Unlike other implementations (for example R) it *uses* the number of failures as a *real* parameter, | |
148 | not as an integer. If you want this integer behaviour, you may need to enforce this by | |
149 | rounding the parameter you pass, probably rounding down, to the nearest integer. | |
150 | For example, R returns the success fraction probability for all values of failures | |
151 | from 0 to 0.999999 thus: | |
152 | [pre | |
153 | __spaces R> formatC(pgeom(0.0001,0.5, FALSE), digits=17) " 0.5" | |
154 | ] [/pre] | |
155 | So in Boost.Math the equivalent is | |
156 | */ | |
157 | geometric g05(0.5); // Probability of success = 0.5 or 50% | |
158 | // Output all potentially significant digits for the type, here double. | |
159 | ||
160 | #ifdef BOOST_NO_CXX11_NUMERIC_LIMITS | |
161 | int max_digits10 = 2 + (boost::math::policies::digits<double, boost::math::policies::policy<> >() * 30103UL) / 100000UL; | |
162 | cout << "BOOST_NO_CXX11_NUMERIC_LIMITS is defined" << endl; | |
163 | #else | |
164 | int max_digits10 = std::numeric_limits<double>::max_digits10; | |
165 | #endif | |
166 | cout << "Show all potentially significant decimal digits std::numeric_limits<double>::max_digits10 = " | |
167 | << max_digits10 << endl; | |
168 | cout.precision(max_digits10); // | |
169 | ||
170 | cout << cdf(g05, 0.0001) << endl; // returns 0.5000346561579232, not exact 0.5. | |
171 | /*`To get the R discrete behaviour, you simply need to round with, | |
172 | for example, the `floor` function. | |
173 | */ | |
174 | cout << cdf(g05, floor(0.0001)) << endl; // returns exactly 0.5 | |
175 | /*` | |
176 | [pre | |
177 | `> formatC(pgeom(0.9999999,0.5, FALSE), digits=17) [1] " 0.25"` | |
178 | `> formatC(pgeom(1.999999,0.5, FALSE), digits=17)[1] " 0.25" k = 1` | |
179 | `> formatC(pgeom(1.9999999,0.5, FALSE), digits=17)[1] "0.12500000000000003" k = 2` | |
180 | ] [/pre] | |
181 | shows that R makes an arbitrary round-up decision at about 1e7 from the next integer above. | |
182 | This may be convenient in practice, and could be replicated in C++ if desired. | |
183 | ||
184 | [h6 Surveying customers to find one with a faulty product] | |
185 | A company knows from warranty claims that 2% of their products will be faulty, | |
186 | so the 'success_fraction' of finding a fault is 0.02. | |
187 | It wants to interview a purchaser of faulty products to assess their 'user experience'. | |
188 | ||
189 | To estimate how many customers they will probably need to contact | |
190 | in order to find one who has suffered from the fault, | |
191 | we first construct a geometric distribution with probability 0.02, | |
192 | and then chose a confidence, say 80%, 95%, or 99% to finding a customer with a fault. | |
193 | Finally, we probably want to round up the result to the integer above using the `ceil` function. | |
194 | (We could also use a policy, but that is hardly worthwhile for this simple application.) | |
195 | ||
196 | (This also assumes that each customer only buys one product: | |
197 | if customers bought more than one item, | |
198 | the probability of finding a customer with a fault obviously improves.) | |
199 | */ | |
200 | cout.precision(5); | |
201 | geometric g(0.02); // On average, 2 in 100 products are faulty. | |
202 | double c = 0.95; // 95% confidence. | |
203 | cout << " quantile(g, " << c << ") = " << quantile(g, c) << endl; | |
204 | ||
205 | cout << "To be " << c * 100 | |
206 | << "% confident of finding we customer with a fault, need to survey " | |
207 | << ceil(quantile(g, c)) << " customers." << endl; // 148 | |
208 | c = 0.99; // Very confident. | |
209 | cout << "To be " << c * 100 | |
210 | << "% confident of finding we customer with a fault, need to survey " | |
211 | << ceil(quantile(g, c)) << " customers." << endl; // 227 | |
212 | c = 0.80; // Only reasonably confident. | |
213 | cout << "To be " << c * 100 | |
214 | << "% confident of finding we customer with a fault, need to survey " | |
215 | << ceil(quantile(g, c)) << " customers." << endl; // 79 | |
216 | ||
217 | /*`[h6 Basket Ball Shooters] | |
218 | According to Wikipedia, average pro basket ball players get | |
219 | [@http://en.wikipedia.org/wiki/Free_throw free throws] | |
220 | in the baskets 70 to 80 % of the time, | |
221 | but some get as high as 95%, and others as low as 50%. | |
222 | Suppose we want to compare the probabilities | |
223 | of failing to get a score only on the first or on the fifth shot? | |
224 | To start we will consider the average shooter, say 75%. | |
225 | So we construct a geometric distribution | |
226 | with success_fraction parameter 75/100 = 0.75. | |
227 | */ | |
228 | cout.precision(2); | |
229 | geometric gav(0.75); // Shooter averages 7.5 out of 10 in the basket. | |
230 | /*`What is probability of getting 1st try in the basket, that is with no failures? */ | |
231 | cout << "Probability of score on 1st try = " << pdf(gav, 0) << endl; // 0.75 | |
232 | /*`This is, of course, the success_fraction probability 75%. | |
233 | What is the probability that the shooter only scores on the fifth shot? | |
234 | So there are 5-1 = 4 failures before the first success.*/ | |
235 | cout << "Probability of score on 5th try = " << pdf(gav, 4) << endl; // 0.0029 | |
236 | /*`Now compare this with the poor and the best players success fraction. | |
237 | We need to constructing new distributions with the different success fractions, | |
238 | and then get the corresponding probability density functions values: | |
239 | */ | |
240 | geometric gbest(0.95); | |
241 | cout << "Probability of score on 5th try = " << pdf(gbest, 4) << endl; // 5.9e-6 | |
242 | geometric gmediocre(0.50); | |
243 | cout << "Probability of score on 5th try = " << pdf(gmediocre, 4) << endl; // 0.031 | |
244 | /*`So we can see the very much smaller chance (0.000006) of 4 failures by the best shooters, | |
245 | compared to the 0.03 of the mediocre.*/ | |
246 | ||
247 | /*`[h6 Estimating failures] | |
248 | Of course one man's failure is an other man's success. | |
249 | So a fault can be defined as a 'success'. | |
250 | ||
251 | If a fault occurs once after 100 flights, then one might naively say | |
252 | that the risk of fault is obviously 1 in 100 = 1/100, a probability of 0.01. | |
253 | ||
254 | This is the best estimate we can make, but while it is the truth, | |
255 | it is not the whole truth, | |
256 | for it hides the big uncertainty when estimating from a single event. | |
257 | "One swallow doesn't make a summer." | |
258 | To show the magnitude of the uncertainty, the geometric | |
259 | (or the negative binomial) distribution can be used. | |
260 | ||
261 | If we chose the popular 95% confidence in the limits, corresponding to an alpha of 0.05, | |
262 | because we are calculating a two-sided interval, we must divide alpha by two. | |
263 | */ | |
264 | double alpha = 0.05; | |
265 | double k = 100; // So frequency of occurrence is 1/100. | |
266 | cout << "Probability is failure is " << 1/k << endl; | |
267 | double t = geometric::find_lower_bound_on_p(k, alpha/2); | |
268 | cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
269 | << t << endl; // 0.00025 | |
270 | t = geometric::find_upper_bound_on_p(k, alpha/2); | |
271 | cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
272 | << t << endl; // 0.037 | |
273 | /*`So while we estimate the probability is 0.01, it might lie between 0.0003 and 0.04. | |
274 | Even if we relax our confidence to alpha = 90%, the bounds only contract to 0.0005 and 0.03. | |
275 | And if we require a high confidence, they widen to 0.00005 to 0.05. | |
276 | */ | |
277 | alpha = 0.1; // 90% confidence. | |
278 | t = geometric::find_lower_bound_on_p(k, alpha/2); | |
279 | cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
280 | << t << endl; // 0.0005 | |
281 | t = geometric::find_upper_bound_on_p(k, alpha/2); | |
282 | cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
283 | << t << endl; // 0.03 | |
284 | ||
285 | alpha = 0.01; // 99% confidence. | |
286 | t = geometric::find_lower_bound_on_p(k, alpha/2); | |
287 | cout << "geometric::find_lower_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
288 | << t << endl; // 5e-005 | |
289 | t = geometric::find_upper_bound_on_p(k, alpha/2); | |
290 | cout << "geometric::find_upper_bound_on_p(" << int(k) << ", " << alpha/2 << ") = " | |
291 | << t << endl; // 0.052 | |
292 | /*`In real life, there will usually be more than one event (fault or success), | |
f67539c2 | 293 | when the negative binomial, which has the necessary extra parameter, will be needed. |
7c673cae FG |
294 | */ |
295 | ||
296 | /*`As noted above, using a catch block is always a good idea, | |
297 | even if you hope not to use it! | |
298 | */ | |
299 | } | |
300 | catch(const std::exception& e) | |
301 | { // Since we have set an overflow policy of ignore_error, | |
302 | // an overflow exception should never be thrown. | |
303 | std::cout << "\nMessage from thrown exception was:\n " << e.what() << std::endl; | |
304 | /*` | |
305 | For example, without a ignore domain error policy, | |
306 | if we asked for ``pdf(g, -1)`` for example, | |
307 | we would get an unhelpful abort, but with a catch: | |
308 | [pre | |
309 | Message from thrown exception was: | |
310 | Error in function boost::math::pdf(const exponential_distribution<double>&, double): | |
311 | Number of failures argument is -1, but must be >= 0 ! | |
312 | ] [/pre] | |
313 | */ | |
314 | //] [/ geometric_eg1_2] | |
315 | } | |
316 | return 0; | |
317 | } // int main() | |
318 | ||
319 | ||
320 | /* | |
321 | Output is: | |
322 | ||
323 | Geometric distribution example | |
324 | ||
325 | success fraction of a six-sided dice is 0.1667 | |
326 | 0.1667 | |
327 | 0.1667 | |
328 | 0.1389 | |
329 | pdf(g6, 0) + pdf(g6, 1) = 0.3056 | |
330 | cdf(g6, 1) = 0.3056 | |
331 | cdf(g6, 12) = 0.9065 | |
332 | quantile(g6, 0.99) = 24.26 | |
333 | 0.1389 | |
334 | 0.3056 | |
335 | quantile(complement(g6, 1 - p)) 24.26 | |
336 | 0.5000346561579232 | |
337 | 0.5 | |
338 | quantile(g, 0.95) = 147.28 | |
339 | To be 95% confident of finding we customer with a fault, need to survey 148 customers. | |
340 | To be 99% confident of finding we customer with a fault, need to survey 227 customers. | |
341 | To be 80% confident of finding we customer with a fault, need to survey 79 customers. | |
342 | Probability of score on 1st try = 0.75 | |
343 | Probability of score on 5th try = 0.0029 | |
344 | Probability of score on 5th try = 5.9e-006 | |
345 | Probability of score on 5th try = 0.031 | |
346 | Probability is failure is 0.01 | |
347 | geometric::find_lower_bound_on_p(100, 0.025) = 0.00025 | |
348 | geometric::find_upper_bound_on_p(100, 0.025) = 0.037 | |
349 | geometric::find_lower_bound_on_p(100, 0.05) = 0.00051 | |
350 | geometric::find_upper_bound_on_p(100, 0.05) = 0.03 | |
351 | geometric::find_lower_bound_on_p(100, 0.005) = 5e-005 | |
352 | geometric::find_upper_bound_on_p(100, 0.005) = 0.052 | |
353 | ||
354 | */ | |
355 | ||
356 | ||
357 | ||
358 | ||
359 | ||
360 | ||
361 | ||
362 | ||
363 | ||
364 |